xref: /petsc/src/dm/impls/da/fdda.c (revision 1a28a8eb6e2fdc52d030896e44167e52bdfe3748)
1 
2 #include <petsc/private/dmdaimpl.h> /*I      "petscdmda.h"     I*/
3 #include <petscmat.h>
4 #include <petscbt.h>
5 
6 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM, ISColoringType, ISColoring *);
7 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM, ISColoringType, ISColoring *);
8 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM, ISColoringType, ISColoring *);
9 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM, ISColoringType, ISColoring *);
10 
11 /*
12    For ghost i that may be negative or greater than the upper bound this
13   maps it into the 0:m-1 range using periodicity
14 */
15 #define SetInRange(i, m) ((i < 0) ? m + i : ((i >= m) ? i - m : i))
16 
17 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill, PetscInt w, PetscInt **rfill)
18 {
19   PetscInt i, j, nz, *fill;
20 
21   PetscFunctionBegin;
22   if (!dfill) PetscFunctionReturn(PETSC_SUCCESS);
23 
24   /* count number nonzeros */
25   nz = 0;
26   for (i = 0; i < w; i++) {
27     for (j = 0; j < w; j++) {
28       if (dfill[w * i + j]) nz++;
29     }
30   }
31   PetscCall(PetscMalloc1(nz + w + 1, &fill));
32   /* construct modified CSR storage of nonzero structure */
33   /*  fill[0 -- w] marks starts of each row of column indices (and end of last row)
34    so fill[1] - fill[0] gives number of nonzeros in first row etc */
35   nz = w + 1;
36   for (i = 0; i < w; i++) {
37     fill[i] = nz;
38     for (j = 0; j < w; j++) {
39       if (dfill[w * i + j]) {
40         fill[nz] = j;
41         nz++;
42       }
43     }
44   }
45   fill[w] = nz;
46 
47   *rfill = fill;
48   PetscFunctionReturn(PETSC_SUCCESS);
49 }
50 
51 static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse, PetscInt w, PetscInt **rfill)
52 {
53   PetscInt nz;
54 
55   PetscFunctionBegin;
56   if (!dfillsparse) PetscFunctionReturn(PETSC_SUCCESS);
57 
58   /* Determine number of non-zeros */
59   nz = (dfillsparse[w] - w - 1);
60 
61   /* Allocate space for our copy of the given sparse matrix representation. */
62   PetscCall(PetscMalloc1(nz + w + 1, rfill));
63   PetscCall(PetscArraycpy(*rfill, dfillsparse, nz + w + 1));
64   PetscFunctionReturn(PETSC_SUCCESS);
65 }
66 
67 static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd)
68 {
69   PetscInt i, k, cnt = 1;
70 
71   PetscFunctionBegin;
72 
73   /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of
74    columns to the left with any nonzeros in them plus 1 */
75   PetscCall(PetscCalloc1(dd->w, &dd->ofillcols));
76   for (i = 0; i < dd->w; i++) {
77     for (k = dd->ofill[i]; k < dd->ofill[i + 1]; k++) dd->ofillcols[dd->ofill[k]] = 1;
78   }
79   for (i = 0; i < dd->w; i++) {
80     if (dd->ofillcols[i]) dd->ofillcols[i] = cnt++;
81   }
82   PetscFunctionReturn(PETSC_SUCCESS);
83 }
84 
85 /*@
86     DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem
87     of the matrix returned by `DMCreateMatrix()`.
88 
89     Logically Collective
90 
91     Input Parameters:
92 +   da - the distributed array
93 .   dfill - the fill pattern in the diagonal block (may be NULL, means use dense block)
94 -   ofill - the fill pattern in the off-diagonal blocks
95 
96     Level: developer
97 
98     Notes:
99     This only makes sense when you are doing multicomponent problems but using the
100        `MATMPIAIJ` matrix format
101 
102            The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries
103        representing coupling and 0 entries for missing coupling. For example
104 .vb
105             dfill[9] = {1, 0, 0,
106                         1, 1, 0,
107                         0, 1, 1}
108 .ve
109        means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with
110        itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the
111        diagonal block).
112 
113      `DMDASetGetMatrix()` allows you to provide general code for those more complicated nonzero patterns then
114      can be represented in the dfill, ofill format
115 
116    Contributed by Glenn Hammond
117 
118 .seealso: `DM`, `DMDA`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()`
119 @*/
120 PetscErrorCode DMDASetBlockFills(DM da, const PetscInt *dfill, const PetscInt *ofill)
121 {
122   DM_DA *dd = (DM_DA *)da->data;
123 
124   PetscFunctionBegin;
125   /* save the given dfill and ofill information */
126   PetscCall(DMDASetBlockFills_Private(dfill, dd->w, &dd->dfill));
127   PetscCall(DMDASetBlockFills_Private(ofill, dd->w, &dd->ofill));
128 
129   /* count nonzeros in ofill columns */
130   PetscCall(DMDASetBlockFills_Private2(dd));
131   PetscFunctionReturn(PETSC_SUCCESS);
132 }
133 
134 /*@
135     DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem
136     of the matrix returned by `DMCreateMatrix()`, using sparse representations
137     of fill patterns.
138 
139     Logically Collective
140 
141     Input Parameters:
142 +   da - the distributed array
143 .   dfill - the sparse fill pattern in the diagonal block (may be `NULL`, means use dense block)
144 -   ofill - the sparse fill pattern in the off-diagonal blocks
145 
146     Level: developer
147 
148     Notes:
149     This only makes sense when you are doing multicomponent problems but using the
150        `MATMPIAIJ` matrix format
151 
152            The format for `dfill` and `ofill` is a sparse representation of a
153            dof-by-dof matrix with 1 entries representing coupling and 0 entries
154            for missing coupling.  The sparse representation is a 1 dimensional
155            array of length nz + dof + 1, where nz is the number of non-zeros in
156            the matrix.  The first dof entries in the array give the
157            starting array indices of each row's items in the rest of the array,
158            the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array)
159            and the remaining nz items give the column indices of each of
160            the 1s within the logical 2D matrix.  Each row's items within
161            the array are the column indices of the 1s within that row
162            of the 2D matrix.  PETSc developers may recognize that this is the
163            same format as that computed by the `DMDASetBlockFills_Private()`
164            function from a dense 2D matrix representation.
165 
166      `DMDASetGetMatrix()` allows you to provide general code for those more complicated nonzero patterns then
167      can be represented in the `dfill`, `ofill` format
168 
169    Contributed by Philip C. Roth
170 
171 .seealso: `DM`, `DMDA`, `DMDASetBlockFills()`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()`
172 @*/
173 PetscErrorCode DMDASetBlockFillsSparse(DM da, const PetscInt *dfillsparse, const PetscInt *ofillsparse)
174 {
175   DM_DA *dd = (DM_DA *)da->data;
176 
177   PetscFunctionBegin;
178   /* save the given dfill and ofill information */
179   PetscCall(DMDASetBlockFillsSparse_Private(dfillsparse, dd->w, &dd->dfill));
180   PetscCall(DMDASetBlockFillsSparse_Private(ofillsparse, dd->w, &dd->ofill));
181 
182   /* count nonzeros in ofill columns */
183   PetscCall(DMDASetBlockFills_Private2(dd));
184   PetscFunctionReturn(PETSC_SUCCESS);
185 }
186 
187 PetscErrorCode DMCreateColoring_DA(DM da, ISColoringType ctype, ISColoring *coloring)
188 {
189   PetscInt       dim, m, n, p, nc;
190   DMBoundaryType bx, by, bz;
191   MPI_Comm       comm;
192   PetscMPIInt    size;
193   PetscBool      isBAIJ;
194   DM_DA         *dd = (DM_DA *)da->data;
195 
196   PetscFunctionBegin;
197   /*
198                                   m
199           ------------------------------------------------------
200          |                                                     |
201          |                                                     |
202          |               ----------------------                |
203          |               |                    |                |
204       n  |           yn  |                    |                |
205          |               |                    |                |
206          |               .---------------------                |
207          |             (xs,ys)     xn                          |
208          |            .                                        |
209          |         (gxs,gys)                                   |
210          |                                                     |
211           -----------------------------------------------------
212   */
213 
214   /*
215          nc - number of components per grid point
216          col - number of colors needed in one direction for single component problem
217 
218   */
219   PetscCall(DMDAGetInfo(da, &dim, NULL, NULL, NULL, &m, &n, &p, &nc, NULL, &bx, &by, &bz, NULL));
220 
221   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
222   PetscCallMPI(MPI_Comm_size(comm, &size));
223   if (ctype == IS_COLORING_LOCAL) {
224     if (size == 1) {
225       ctype = IS_COLORING_GLOBAL;
226     } else {
227       PetscCheck((dim == 1) || !((m == 1 && bx == DM_BOUNDARY_PERIODIC) || (n == 1 && by == DM_BOUNDARY_PERIODIC) || (p == 1 && bz == DM_BOUNDARY_PERIODIC)), PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain on the same process");
228     }
229   }
230 
231   /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ
232      matrices is for the blocks, not the individual matrix elements  */
233   PetscCall(PetscStrbeginswith(da->mattype, MATBAIJ, &isBAIJ));
234   if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATMPIBAIJ, &isBAIJ));
235   if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATSEQBAIJ, &isBAIJ));
236   if (isBAIJ) {
237     dd->w  = 1;
238     dd->xs = dd->xs / nc;
239     dd->xe = dd->xe / nc;
240     dd->Xs = dd->Xs / nc;
241     dd->Xe = dd->Xe / nc;
242   }
243 
244   /*
245      We do not provide a getcoloring function in the DMDA operations because
246    the basic DMDA does not know about matrices. We think of DMDA as being
247    more low-level then matrices.
248   */
249   if (dim == 1) PetscCall(DMCreateColoring_DA_1d_MPIAIJ(da, ctype, coloring));
250   else if (dim == 2) PetscCall(DMCreateColoring_DA_2d_MPIAIJ(da, ctype, coloring));
251   else if (dim == 3) PetscCall(DMCreateColoring_DA_3d_MPIAIJ(da, ctype, coloring));
252   else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not done for %" PetscInt_FMT " dimension, send us mail petsc-maint@mcs.anl.gov for code", dim);
253   if (isBAIJ) {
254     dd->w  = nc;
255     dd->xs = dd->xs * nc;
256     dd->xe = dd->xe * nc;
257     dd->Xs = dd->Xs * nc;
258     dd->Xe = dd->Xe * nc;
259   }
260   PetscFunctionReturn(PETSC_SUCCESS);
261 }
262 
263 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
264 {
265   PetscInt         xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, M, N, dim, s, k, nc, col;
266   PetscInt         ncolors = 0;
267   MPI_Comm         comm;
268   DMBoundaryType   bx, by;
269   DMDAStencilType  st;
270   ISColoringValue *colors;
271   DM_DA           *dd = (DM_DA *)da->data;
272 
273   PetscFunctionBegin;
274   /*
275          nc - number of components per grid point
276          col - number of colors needed in one direction for single component problem
277 
278   */
279   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st));
280   col = 2 * s + 1;
281   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
282   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
283   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
284 
285   /* special case as taught to us by Paul Hovland */
286   if (st == DMDA_STENCIL_STAR && s == 1) {
287     PetscCall(DMCreateColoring_DA_2d_5pt_MPIAIJ(da, ctype, coloring));
288   } else {
289     if (ctype == IS_COLORING_GLOBAL) {
290       if (!dd->localcoloring) {
291         PetscCall(PetscMalloc1(nc * nx * ny, &colors));
292         ii = 0;
293         for (j = ys; j < ys + ny; j++) {
294           for (i = xs; i < xs + nx; i++) {
295             for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((i % col) + col * (j % col));
296           }
297         }
298         ncolors = nc + nc * (col - 1 + col * (col - 1));
299         PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring));
300       }
301       *coloring = dd->localcoloring;
302     } else if (ctype == IS_COLORING_LOCAL) {
303       if (!dd->ghostedcoloring) {
304         PetscCall(PetscMalloc1(nc * gnx * gny, &colors));
305         ii = 0;
306         for (j = gys; j < gys + gny; j++) {
307           for (i = gxs; i < gxs + gnx; i++) {
308             for (k = 0; k < nc; k++) {
309               /* the complicated stuff is to handle periodic boundaries */
310               colors[ii++] = k + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col));
311             }
312           }
313         }
314         ncolors = nc + nc * (col - 1 + col * (col - 1));
315         PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
316         /* PetscIntView(ncolors,(PetscInt*)colors,0); */
317 
318         PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
319       }
320       *coloring = dd->ghostedcoloring;
321     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
322   }
323   PetscCall(ISColoringReference(*coloring));
324   PetscFunctionReturn(PETSC_SUCCESS);
325 }
326 
327 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
328 {
329   PetscInt         xs, ys, nx, ny, i, j, gxs, gys, gnx, gny, m, n, p, dim, s, k, nc, col, zs, gzs, ii, l, nz, gnz, M, N, P;
330   PetscInt         ncolors;
331   MPI_Comm         comm;
332   DMBoundaryType   bx, by, bz;
333   DMDAStencilType  st;
334   ISColoringValue *colors;
335   DM_DA           *dd = (DM_DA *)da->data;
336 
337   PetscFunctionBegin;
338   /*
339          nc - number of components per grid point
340          col - number of colors needed in one direction for single component problem
341   */
342   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
343   col = 2 * s + 1;
344   PetscCheck(bx != DM_BOUNDARY_PERIODIC || (m % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in X is divisible\n\
345                  by 2*stencil_width + 1\n");
346   PetscCheck(by != DM_BOUNDARY_PERIODIC || (n % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Y is divisible\n\
347                  by 2*stencil_width + 1\n");
348   PetscCheck(bz != DM_BOUNDARY_PERIODIC || (p % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Z is divisible\n\
349                  by 2*stencil_width + 1\n");
350 
351   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
352   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
353   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
354 
355   /* create the coloring */
356   if (ctype == IS_COLORING_GLOBAL) {
357     if (!dd->localcoloring) {
358       PetscCall(PetscMalloc1(nc * nx * ny * nz, &colors));
359       ii = 0;
360       for (k = zs; k < zs + nz; k++) {
361         for (j = ys; j < ys + ny; j++) {
362           for (i = xs; i < xs + nx; i++) {
363             for (l = 0; l < nc; l++) colors[ii++] = l + nc * ((i % col) + col * (j % col) + col * col * (k % col));
364           }
365         }
366       }
367       ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1));
368       PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny * nz, colors, PETSC_OWN_POINTER, &dd->localcoloring));
369     }
370     *coloring = dd->localcoloring;
371   } else if (ctype == IS_COLORING_LOCAL) {
372     if (!dd->ghostedcoloring) {
373       PetscCall(PetscMalloc1(nc * gnx * gny * gnz, &colors));
374       ii = 0;
375       for (k = gzs; k < gzs + gnz; k++) {
376         for (j = gys; j < gys + gny; j++) {
377           for (i = gxs; i < gxs + gnx; i++) {
378             for (l = 0; l < nc; l++) {
379               /* the complicated stuff is to handle periodic boundaries */
380               colors[ii++] = l + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col) + col * col * (SetInRange(k, p) % col));
381             }
382           }
383         }
384       }
385       ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1));
386       PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny * gnz, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
387       PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
388     }
389     *coloring = dd->ghostedcoloring;
390   } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
391   PetscCall(ISColoringReference(*coloring));
392   PetscFunctionReturn(PETSC_SUCCESS);
393 }
394 
395 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
396 {
397   PetscInt         xs, nx, i, i1, gxs, gnx, l, m, M, dim, s, nc, col;
398   PetscInt         ncolors;
399   MPI_Comm         comm;
400   DMBoundaryType   bx;
401   ISColoringValue *colors;
402   DM_DA           *dd = (DM_DA *)da->data;
403 
404   PetscFunctionBegin;
405   /*
406          nc - number of components per grid point
407          col - number of colors needed in one direction for single component problem
408   */
409   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, &M, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
410   col = 2 * s + 1;
411   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
412   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
413   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
414 
415   /* create the coloring */
416   if (ctype == IS_COLORING_GLOBAL) {
417     if (!dd->localcoloring) {
418       PetscCall(PetscMalloc1(nc * nx, &colors));
419       if (dd->ofillcols) {
420         PetscInt tc = 0;
421         for (i = 0; i < nc; i++) tc += (PetscInt)(dd->ofillcols[i] > 0);
422         i1 = 0;
423         for (i = xs; i < xs + nx; i++) {
424           for (l = 0; l < nc; l++) {
425             if (dd->ofillcols[l] && (i % col)) {
426               colors[i1++] = nc - 1 + tc * ((i % col) - 1) + dd->ofillcols[l];
427             } else {
428               colors[i1++] = l;
429             }
430           }
431         }
432         ncolors = nc + 2 * s * tc;
433       } else {
434         i1 = 0;
435         for (i = xs; i < xs + nx; i++) {
436           for (l = 0; l < nc; l++) colors[i1++] = l + nc * (i % col);
437         }
438         ncolors = nc + nc * (col - 1);
439       }
440       PetscCall(ISColoringCreate(comm, ncolors, nc * nx, colors, PETSC_OWN_POINTER, &dd->localcoloring));
441     }
442     *coloring = dd->localcoloring;
443   } else if (ctype == IS_COLORING_LOCAL) {
444     if (!dd->ghostedcoloring) {
445       PetscCall(PetscMalloc1(nc * gnx, &colors));
446       i1 = 0;
447       for (i = gxs; i < gxs + gnx; i++) {
448         for (l = 0; l < nc; l++) {
449           /* the complicated stuff is to handle periodic boundaries */
450           colors[i1++] = l + nc * (SetInRange(i, m) % col);
451         }
452       }
453       ncolors = nc + nc * (col - 1);
454       PetscCall(ISColoringCreate(comm, ncolors, nc * gnx, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
455       PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
456     }
457     *coloring = dd->ghostedcoloring;
458   } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
459   PetscCall(ISColoringReference(*coloring));
460   PetscFunctionReturn(PETSC_SUCCESS);
461 }
462 
463 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
464 {
465   PetscInt         xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, dim, s, k, nc;
466   PetscInt         ncolors;
467   MPI_Comm         comm;
468   DMBoundaryType   bx, by;
469   ISColoringValue *colors;
470   DM_DA           *dd = (DM_DA *)da->data;
471 
472   PetscFunctionBegin;
473   /*
474          nc - number of components per grid point
475          col - number of colors needed in one direction for single component problem
476   */
477   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, NULL));
478   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
479   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
480   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
481   /* create the coloring */
482   if (ctype == IS_COLORING_GLOBAL) {
483     if (!dd->localcoloring) {
484       PetscCall(PetscMalloc1(nc * nx * ny, &colors));
485       ii = 0;
486       for (j = ys; j < ys + ny; j++) {
487         for (i = xs; i < xs + nx; i++) {
488           for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((3 * j + i) % 5);
489         }
490       }
491       ncolors = 5 * nc;
492       PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring));
493     }
494     *coloring = dd->localcoloring;
495   } else if (ctype == IS_COLORING_LOCAL) {
496     if (!dd->ghostedcoloring) {
497       PetscCall(PetscMalloc1(nc * gnx * gny, &colors));
498       ii = 0;
499       for (j = gys; j < gys + gny; j++) {
500         for (i = gxs; i < gxs + gnx; i++) {
501           for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((3 * SetInRange(j, n) + SetInRange(i, m)) % 5);
502         }
503       }
504       ncolors = 5 * nc;
505       PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
506       PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
507     }
508     *coloring = dd->ghostedcoloring;
509   } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
510   PetscFunctionReturn(PETSC_SUCCESS);
511 }
512 
513 /* =========================================================================== */
514 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM, Mat);
515 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM, Mat);
516 extern PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM, Mat);
517 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM, Mat);
518 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM, Mat);
519 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM, Mat);
520 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM, Mat);
521 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM, Mat);
522 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM, Mat);
523 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM, Mat);
524 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM, Mat);
525 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM, Mat);
526 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM, Mat);
527 extern PetscErrorCode DMCreateMatrix_DA_IS(DM, Mat);
528 
529 /*@C
530    MatSetupDM - Sets the `DMDA` that is to be used by the HYPRE_StructMatrix PETSc matrix
531 
532    Logically Collective
533 
534    Input Parameters:
535 +  mat - the matrix
536 -  da - the da
537 
538    Level: intermediate
539 
540 .seealso: `DMDA`, `Mat`, `MatSetUp()`
541 @*/
542 PetscErrorCode MatSetupDM(Mat mat, DM da)
543 {
544   PetscFunctionBegin;
545   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
546   PetscValidHeaderSpecificType(da, DM_CLASSID, 2, DMDA);
547   PetscTryMethod(mat, "MatSetupDM_C", (Mat, DM), (mat, da));
548   PetscFunctionReturn(PETSC_SUCCESS);
549 }
550 
551 PetscErrorCode MatView_MPI_DA(Mat A, PetscViewer viewer)
552 {
553   DM                da;
554   const char       *prefix;
555   Mat               Anatural;
556   AO                ao;
557   PetscInt          rstart, rend, *petsc, i;
558   IS                is;
559   MPI_Comm          comm;
560   PetscViewerFormat format;
561 
562   PetscFunctionBegin;
563   /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */
564   PetscCall(PetscViewerGetFormat(viewer, &format));
565   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(PETSC_SUCCESS);
566 
567   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
568   PetscCall(MatGetDM(A, &da));
569   PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA");
570 
571   PetscCall(DMDAGetAO(da, &ao));
572   PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
573   PetscCall(PetscMalloc1(rend - rstart, &petsc));
574   for (i = rstart; i < rend; i++) petsc[i - rstart] = i;
575   PetscCall(AOApplicationToPetsc(ao, rend - rstart, petsc));
576   PetscCall(ISCreateGeneral(comm, rend - rstart, petsc, PETSC_OWN_POINTER, &is));
577 
578   /* call viewer on natural ordering */
579   PetscCall(MatCreateSubMatrix(A, is, is, MAT_INITIAL_MATRIX, &Anatural));
580   PetscCall(ISDestroy(&is));
581   PetscCall(PetscObjectGetOptionsPrefix((PetscObject)A, &prefix));
582   PetscCall(PetscObjectSetOptionsPrefix((PetscObject)Anatural, prefix));
583   PetscCall(PetscObjectSetName((PetscObject)Anatural, ((PetscObject)A)->name));
584   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE;
585   PetscCall(MatView(Anatural, viewer));
586   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE;
587   PetscCall(MatDestroy(&Anatural));
588   PetscFunctionReturn(PETSC_SUCCESS);
589 }
590 
591 PetscErrorCode MatLoad_MPI_DA(Mat A, PetscViewer viewer)
592 {
593   DM       da;
594   Mat      Anatural, Aapp;
595   AO       ao;
596   PetscInt rstart, rend, *app, i, m, n, M, N;
597   IS       is;
598   MPI_Comm comm;
599 
600   PetscFunctionBegin;
601   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
602   PetscCall(MatGetDM(A, &da));
603   PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA");
604 
605   /* Load the matrix in natural ordering */
606   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Anatural));
607   PetscCall(MatSetType(Anatural, ((PetscObject)A)->type_name));
608   PetscCall(MatGetSize(A, &M, &N));
609   PetscCall(MatGetLocalSize(A, &m, &n));
610   PetscCall(MatSetSizes(Anatural, m, n, M, N));
611   PetscCall(MatLoad(Anatural, viewer));
612 
613   /* Map natural ordering to application ordering and create IS */
614   PetscCall(DMDAGetAO(da, &ao));
615   PetscCall(MatGetOwnershipRange(Anatural, &rstart, &rend));
616   PetscCall(PetscMalloc1(rend - rstart, &app));
617   for (i = rstart; i < rend; i++) app[i - rstart] = i;
618   PetscCall(AOPetscToApplication(ao, rend - rstart, app));
619   PetscCall(ISCreateGeneral(comm, rend - rstart, app, PETSC_OWN_POINTER, &is));
620 
621   /* Do permutation and replace header */
622   PetscCall(MatCreateSubMatrix(Anatural, is, is, MAT_INITIAL_MATRIX, &Aapp));
623   PetscCall(MatHeaderReplace(A, &Aapp));
624   PetscCall(ISDestroy(&is));
625   PetscCall(MatDestroy(&Anatural));
626   PetscFunctionReturn(PETSC_SUCCESS);
627 }
628 
629 PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J)
630 {
631   PetscInt    dim, dof, nx, ny, nz, dims[3], starts[3], M, N, P;
632   Mat         A;
633   MPI_Comm    comm;
634   MatType     Atype;
635   MatType     mtype;
636   PetscMPIInt size;
637   DM_DA      *dd    = (DM_DA *)da->data;
638   void (*aij)(void) = NULL, (*baij)(void) = NULL, (*sbaij)(void) = NULL, (*sell)(void) = NULL, (*is)(void) = NULL;
639 
640   PetscFunctionBegin;
641   PetscCall(MatInitializePackage());
642   mtype = da->mattype;
643 
644   /*
645                                   m
646           ------------------------------------------------------
647          |                                                     |
648          |                                                     |
649          |               ----------------------                |
650          |               |                    |                |
651       n  |           ny  |                    |                |
652          |               |                    |                |
653          |               .---------------------                |
654          |             (xs,ys)     nx                          |
655          |            .                                        |
656          |         (gxs,gys)                                   |
657          |                                                     |
658           -----------------------------------------------------
659   */
660 
661   /*
662          nc - number of components per grid point
663          col - number of colors needed in one direction for single component problem
664   */
665   M   = dd->M;
666   N   = dd->N;
667   P   = dd->P;
668   dim = da->dim;
669   dof = dd->w;
670   /* PetscCall(DMDAGetInfo(da,&dim,&M,&N,&P,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL)); */
671   PetscCall(DMDAGetCorners(da, NULL, NULL, NULL, &nx, &ny, &nz));
672   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
673   PetscCall(MatCreate(comm, &A));
674   PetscCall(MatSetSizes(A, dof * nx * ny * nz, dof * nx * ny * nz, dof * M * N * P, dof * M * N * P));
675   PetscCall(MatSetType(A, mtype));
676   PetscCall(MatSetFromOptions(A));
677   if (dof * nx * ny * nz < da->bind_below) {
678     PetscCall(MatSetBindingPropagates(A, PETSC_TRUE));
679     PetscCall(MatBindToCPU(A, PETSC_TRUE));
680   }
681   PetscCall(MatSetDM(A, da));
682   if (da->structure_only) PetscCall(MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE));
683   PetscCall(MatGetType(A, &Atype));
684   /*
685      We do not provide a getmatrix function in the DMDA operations because
686    the basic DMDA does not know about matrices. We think of DMDA as being more
687    more low-level than matrices. This is kind of cheating but, cause sometimes
688    we think of DMDA has higher level than matrices.
689 
690      We could switch based on Atype (or mtype), but we do not since the
691    specialized setting routines depend only on the particular preallocation
692    details of the matrix, not the type itself.
693   */
694   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJSetPreallocation_C", &aij));
695   if (!aij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJSetPreallocation_C", &aij));
696   if (!aij) {
697     PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIBAIJSetPreallocation_C", &baij));
698     if (!baij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", &baij));
699     if (!baij) {
700       PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPISBAIJSetPreallocation_C", &sbaij));
701       if (!sbaij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqSBAIJSetPreallocation_C", &sbaij));
702       if (!sbaij) {
703         PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPISELLSetPreallocation_C", &sell));
704         if (!sell) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqSELLSetPreallocation_C", &sell));
705       }
706       if (!sell) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatISSetPreallocation_C", &is));
707     }
708   }
709   if (aij) {
710     if (dim == 1) {
711       if (dd->ofill) {
712         PetscCall(DMCreateMatrix_DA_1d_MPIAIJ_Fill(da, A));
713       } else {
714         DMBoundaryType bx;
715         PetscMPIInt    size;
716         PetscCall(DMDAGetInfo(da, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &bx, NULL, NULL, NULL));
717         PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size));
718         if (size == 1 && bx == DM_BOUNDARY_NONE) {
719           PetscCall(DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(da, A));
720         } else {
721           PetscCall(DMCreateMatrix_DA_1d_MPIAIJ(da, A));
722         }
723       }
724     } else if (dim == 2) {
725       if (dd->ofill) {
726         PetscCall(DMCreateMatrix_DA_2d_MPIAIJ_Fill(da, A));
727       } else {
728         PetscCall(DMCreateMatrix_DA_2d_MPIAIJ(da, A));
729       }
730     } else if (dim == 3) {
731       if (dd->ofill) {
732         PetscCall(DMCreateMatrix_DA_3d_MPIAIJ_Fill(da, A));
733       } else {
734         PetscCall(DMCreateMatrix_DA_3d_MPIAIJ(da, A));
735       }
736     }
737   } else if (baij) {
738     if (dim == 2) {
739       PetscCall(DMCreateMatrix_DA_2d_MPIBAIJ(da, A));
740     } else if (dim == 3) {
741       PetscCall(DMCreateMatrix_DA_3d_MPIBAIJ(da, A));
742     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
743   } else if (sbaij) {
744     if (dim == 2) {
745       PetscCall(DMCreateMatrix_DA_2d_MPISBAIJ(da, A));
746     } else if (dim == 3) {
747       PetscCall(DMCreateMatrix_DA_3d_MPISBAIJ(da, A));
748     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
749   } else if (sell) {
750     if (dim == 2) {
751       PetscCall(DMCreateMatrix_DA_2d_MPISELL(da, A));
752     } else if (dim == 3) {
753       PetscCall(DMCreateMatrix_DA_3d_MPISELL(da, A));
754     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
755   } else if (is) {
756     PetscCall(DMCreateMatrix_DA_IS(da, A));
757   } else {
758     ISLocalToGlobalMapping ltog;
759 
760     PetscCall(MatSetBlockSize(A, dof));
761     PetscCall(MatSetUp(A));
762     PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
763     PetscCall(MatSetLocalToGlobalMapping(A, ltog, ltog));
764   }
765   PetscCall(DMDAGetGhostCorners(da, &starts[0], &starts[1], &starts[2], &dims[0], &dims[1], &dims[2]));
766   PetscCall(MatSetStencil(A, dim, dims, starts, dof));
767   PetscCall(MatSetDM(A, da));
768   PetscCallMPI(MPI_Comm_size(comm, &size));
769   if (size > 1) {
770     /* change viewer to display matrix in natural ordering */
771     PetscCall(MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA));
772     PetscCall(MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA));
773   }
774   *J = A;
775   PetscFunctionReturn(PETSC_SUCCESS);
776 }
777 
778 PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]);
779 
780 PetscErrorCode DMCreateMatrix_DA_IS(DM dm, Mat J)
781 {
782   DM_DA                 *da = (DM_DA *)dm->data;
783   Mat                    lJ, P;
784   ISLocalToGlobalMapping ltog;
785   IS                     is;
786   PetscBT                bt;
787   const PetscInt        *e_loc, *idx;
788   PetscInt               i, nel, nen, nv, dof, *gidx, n, N;
789 
790   /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted)
791      We need to filter out the local indices that are not represented through the DMDAGetElements decomposition */
792   PetscFunctionBegin;
793   dof = da->w;
794   PetscCall(MatSetBlockSize(J, dof));
795   PetscCall(DMGetLocalToGlobalMapping(dm, &ltog));
796 
797   /* flag local elements indices in local DMDA numbering */
798   PetscCall(ISLocalToGlobalMappingGetSize(ltog, &nv));
799   PetscCall(PetscBTCreate(nv / dof, &bt));
800   PetscCall(DMDAGetElements(dm, &nel, &nen, &e_loc)); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */
801   for (i = 0; i < nel * nen; i++) PetscCall(PetscBTSet(bt, e_loc[i]));
802 
803   /* filter out (set to -1) the global indices not used by the local elements */
804   PetscCall(PetscMalloc1(nv / dof, &gidx));
805   PetscCall(ISLocalToGlobalMappingGetBlockIndices(ltog, &idx));
806   PetscCall(PetscArraycpy(gidx, idx, nv / dof));
807   PetscCall(ISLocalToGlobalMappingRestoreBlockIndices(ltog, &idx));
808   for (i = 0; i < nv / dof; i++)
809     if (!PetscBTLookup(bt, i)) gidx[i] = -1;
810   PetscCall(PetscBTDestroy(&bt));
811   PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)dm), dof, nv / dof, gidx, PETSC_OWN_POINTER, &is));
812   PetscCall(ISLocalToGlobalMappingCreateIS(is, &ltog));
813   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
814   PetscCall(ISLocalToGlobalMappingDestroy(&ltog));
815   PetscCall(ISDestroy(&is));
816 
817   /* Preallocation */
818   if (dm->prealloc_skip) {
819     PetscCall(MatSetUp(J));
820   } else {
821     PetscCall(MatISGetLocalMat(J, &lJ));
822     PetscCall(MatGetLocalToGlobalMapping(lJ, &ltog, NULL));
823     PetscCall(MatCreate(PetscObjectComm((PetscObject)lJ), &P));
824     PetscCall(MatSetType(P, MATPREALLOCATOR));
825     PetscCall(MatSetLocalToGlobalMapping(P, ltog, ltog));
826     PetscCall(MatGetSize(lJ, &N, NULL));
827     PetscCall(MatGetLocalSize(lJ, &n, NULL));
828     PetscCall(MatSetSizes(P, n, n, N, N));
829     PetscCall(MatSetBlockSize(P, dof));
830     PetscCall(MatSetUp(P));
831     for (i = 0; i < nel; i++) PetscCall(MatSetValuesBlockedLocal(P, nen, e_loc + i * nen, nen, e_loc + i * nen, NULL, INSERT_VALUES));
832     PetscCall(MatPreallocatorPreallocate(P, (PetscBool)!da->prealloc_only, lJ));
833     PetscCall(MatISRestoreLocalMat(J, &lJ));
834     PetscCall(DMDARestoreElements(dm, &nel, &nen, &e_loc));
835     PetscCall(MatDestroy(&P));
836 
837     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
838     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
839   }
840   PetscFunctionReturn(PETSC_SUCCESS);
841 }
842 
843 PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da, Mat J)
844 {
845   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p;
846   PetscInt               lstart, lend, pstart, pend, *dnz, *onz;
847   MPI_Comm               comm;
848   PetscScalar           *values;
849   DMBoundaryType         bx, by;
850   ISLocalToGlobalMapping ltog;
851   DMDAStencilType        st;
852 
853   PetscFunctionBegin;
854   /*
855          nc - number of components per grid point
856          col - number of colors needed in one direction for single component problem
857   */
858   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st));
859   col = 2 * s + 1;
860   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
861   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
862   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
863 
864   PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols));
865   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
866 
867   PetscCall(MatSetBlockSize(J, nc));
868   /* determine the matrix preallocation information */
869   MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
870   for (i = xs; i < xs + nx; i++) {
871     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
872     pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
873 
874     for (j = ys; j < ys + ny; j++) {
875       slot = i - gxs + gnx * (j - gys);
876 
877       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
878       lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
879 
880       cnt = 0;
881       for (k = 0; k < nc; k++) {
882         for (l = lstart; l < lend + 1; l++) {
883           for (p = pstart; p < pend + 1; p++) {
884             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
885               cols[cnt++] = k + nc * (slot + gnx * l + p);
886             }
887           }
888         }
889         rows[k] = k + nc * (slot);
890       }
891       PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
892     }
893   }
894   PetscCall(MatSetBlockSize(J, nc));
895   PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz));
896   PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz));
897   MatPreallocateEnd(dnz, onz);
898 
899   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
900 
901   /*
902     For each node in the grid: we get the neighbors in the local (on processor ordering
903     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
904     PETSc ordering.
905   */
906   if (!da->prealloc_only) {
907     PetscCall(PetscCalloc1(col * col * nc * nc, &values));
908     for (i = xs; i < xs + nx; i++) {
909       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
910       pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
911 
912       for (j = ys; j < ys + ny; j++) {
913         slot = i - gxs + gnx * (j - gys);
914 
915         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
916         lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
917 
918         cnt = 0;
919         for (k = 0; k < nc; k++) {
920           for (l = lstart; l < lend + 1; l++) {
921             for (p = pstart; p < pend + 1; p++) {
922               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
923                 cols[cnt++] = k + nc * (slot + gnx * l + p);
924               }
925             }
926           }
927           rows[k] = k + nc * (slot);
928         }
929         PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES));
930       }
931     }
932     PetscCall(PetscFree(values));
933     /* do not copy values to GPU since they are all zero and not yet needed there */
934     PetscCall(MatBindToCPU(J, PETSC_TRUE));
935     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
936     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
937     PetscCall(MatBindToCPU(J, PETSC_FALSE));
938     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
939   }
940   PetscCall(PetscFree2(rows, cols));
941   PetscFunctionReturn(PETSC_SUCCESS);
942 }
943 
944 PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da, Mat J)
945 {
946   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
947   PetscInt               m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL;
948   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
949   MPI_Comm               comm;
950   PetscScalar           *values;
951   DMBoundaryType         bx, by, bz;
952   ISLocalToGlobalMapping ltog;
953   DMDAStencilType        st;
954 
955   PetscFunctionBegin;
956   /*
957          nc - number of components per grid point
958          col - number of colors needed in one direction for single component problem
959   */
960   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
961   col = 2 * s + 1;
962   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
963   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
964   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
965 
966   PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols));
967   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
968 
969   PetscCall(MatSetBlockSize(J, nc));
970   /* determine the matrix preallocation information */
971   MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
972   for (i = xs; i < xs + nx; i++) {
973     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
974     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
975     for (j = ys; j < ys + ny; j++) {
976       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
977       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
978       for (k = zs; k < zs + nz; k++) {
979         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
980         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
981 
982         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
983 
984         cnt = 0;
985         for (l = 0; l < nc; l++) {
986           for (ii = istart; ii < iend + 1; ii++) {
987             for (jj = jstart; jj < jend + 1; jj++) {
988               for (kk = kstart; kk < kend + 1; kk++) {
989                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
990                   cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
991                 }
992               }
993             }
994           }
995           rows[l] = l + nc * (slot);
996         }
997         PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
998       }
999     }
1000   }
1001   PetscCall(MatSetBlockSize(J, nc));
1002   PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz));
1003   PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz));
1004   MatPreallocateEnd(dnz, onz);
1005   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1006 
1007   /*
1008     For each node in the grid: we get the neighbors in the local (on processor ordering
1009     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1010     PETSc ordering.
1011   */
1012   if (!da->prealloc_only) {
1013     PetscCall(PetscCalloc1(col * col * col * nc * nc * nc, &values));
1014     for (i = xs; i < xs + nx; i++) {
1015       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1016       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1017       for (j = ys; j < ys + ny; j++) {
1018         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1019         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1020         for (k = zs; k < zs + nz; k++) {
1021           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1022           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1023 
1024           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1025 
1026           cnt = 0;
1027           for (l = 0; l < nc; l++) {
1028             for (ii = istart; ii < iend + 1; ii++) {
1029               for (jj = jstart; jj < jend + 1; jj++) {
1030                 for (kk = kstart; kk < kend + 1; kk++) {
1031                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1032                     cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
1033                   }
1034                 }
1035               }
1036             }
1037             rows[l] = l + nc * (slot);
1038           }
1039           PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES));
1040         }
1041       }
1042     }
1043     PetscCall(PetscFree(values));
1044     /* do not copy values to GPU since they are all zero and not yet needed there */
1045     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1046     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1047     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1048     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1049     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1050   }
1051   PetscCall(PetscFree2(rows, cols));
1052   PetscFunctionReturn(PETSC_SUCCESS);
1053 }
1054 
1055 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da, Mat J)
1056 {
1057   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, M, N;
1058   PetscInt               lstart, lend, pstart, pend, *dnz, *onz;
1059   MPI_Comm               comm;
1060   DMBoundaryType         bx, by;
1061   ISLocalToGlobalMapping ltog, mltog;
1062   DMDAStencilType        st;
1063   PetscBool              removedups = PETSC_FALSE, alreadyboundtocpu = PETSC_TRUE;
1064 
1065   PetscFunctionBegin;
1066   /*
1067          nc - number of components per grid point
1068          col - number of colors needed in one direction for single component problem
1069   */
1070   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st));
1071   if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE));
1072   col = 2 * s + 1;
1073   /*
1074        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1075        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1076   */
1077   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1078   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1079   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1080   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1081   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1082 
1083   PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols));
1084   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1085 
1086   PetscCall(MatSetBlockSize(J, nc));
1087   /* determine the matrix preallocation information */
1088   MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
1089   for (i = xs; i < xs + nx; i++) {
1090     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1091     pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1092 
1093     for (j = ys; j < ys + ny; j++) {
1094       slot = i - gxs + gnx * (j - gys);
1095 
1096       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1097       lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1098 
1099       cnt = 0;
1100       for (k = 0; k < nc; k++) {
1101         for (l = lstart; l < lend + 1; l++) {
1102           for (p = pstart; p < pend + 1; p++) {
1103             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
1104               cols[cnt++] = k + nc * (slot + gnx * l + p);
1105             }
1106           }
1107         }
1108         rows[k] = k + nc * (slot);
1109       }
1110       if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1111       else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1112     }
1113   }
1114   PetscCall(MatSetBlockSize(J, nc));
1115   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
1116   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
1117   MatPreallocateEnd(dnz, onz);
1118   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1119   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1120 
1121   /*
1122     For each node in the grid: we get the neighbors in the local (on processor ordering
1123     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1124     PETSc ordering.
1125   */
1126   if (!da->prealloc_only) {
1127     for (i = xs; i < xs + nx; i++) {
1128       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1129       pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1130 
1131       for (j = ys; j < ys + ny; j++) {
1132         slot = i - gxs + gnx * (j - gys);
1133 
1134         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1135         lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1136 
1137         cnt = 0;
1138         for (l = lstart; l < lend + 1; l++) {
1139           for (p = pstart; p < pend + 1; p++) {
1140             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
1141               cols[cnt++] = nc * (slot + gnx * l + p);
1142               for (k = 1; k < nc; k++) {
1143                 cols[cnt] = 1 + cols[cnt - 1];
1144                 cnt++;
1145               }
1146             }
1147           }
1148         }
1149         for (k = 0; k < nc; k++) rows[k] = k + nc * (slot);
1150         PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1151       }
1152     }
1153     /* do not copy values to GPU since they are all zero and not yet needed there */
1154     PetscCall(MatBoundToCPU(J, &alreadyboundtocpu));
1155     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1156     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1157     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1158     if (!alreadyboundtocpu) PetscCall(MatBindToCPU(J, PETSC_FALSE));
1159     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1160     if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1161   }
1162   PetscCall(PetscFree2(rows, cols));
1163   PetscFunctionReturn(PETSC_SUCCESS);
1164 }
1165 
1166 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da, Mat J)
1167 {
1168   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1169   PetscInt               m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, M, N;
1170   PetscInt               lstart, lend, pstart, pend, *dnz, *onz;
1171   DM_DA                 *dd = (DM_DA *)da->data;
1172   PetscInt               ifill_col, *ofill = dd->ofill, *dfill = dd->dfill;
1173   MPI_Comm               comm;
1174   DMBoundaryType         bx, by;
1175   ISLocalToGlobalMapping ltog;
1176   DMDAStencilType        st;
1177   PetscBool              removedups = PETSC_FALSE;
1178 
1179   PetscFunctionBegin;
1180   /*
1181          nc - number of components per grid point
1182          col - number of colors needed in one direction for single component problem
1183   */
1184   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st));
1185   col = 2 * s + 1;
1186   /*
1187        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1188        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1189   */
1190   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1191   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1192   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1193   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1194   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1195 
1196   PetscCall(PetscMalloc1(col * col * nc, &cols));
1197   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1198 
1199   PetscCall(MatSetBlockSize(J, nc));
1200   /* determine the matrix preallocation information */
1201   MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
1202   for (i = xs; i < xs + nx; i++) {
1203     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1204     pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1205 
1206     for (j = ys; j < ys + ny; j++) {
1207       slot = i - gxs + gnx * (j - gys);
1208 
1209       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1210       lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1211 
1212       for (k = 0; k < nc; k++) {
1213         cnt = 0;
1214         for (l = lstart; l < lend + 1; l++) {
1215           for (p = pstart; p < pend + 1; p++) {
1216             if (l || p) {
1217               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */
1218                 for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p);
1219               }
1220             } else {
1221               if (dfill) {
1222                 for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p);
1223               } else {
1224                 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p);
1225               }
1226             }
1227           }
1228         }
1229         row    = k + nc * (slot);
1230         maxcnt = PetscMax(maxcnt, cnt);
1231         if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
1232         else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
1233       }
1234     }
1235   }
1236   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
1237   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
1238   MatPreallocateEnd(dnz, onz);
1239   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1240 
1241   /*
1242     For each node in the grid: we get the neighbors in the local (on processor ordering
1243     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1244     PETSc ordering.
1245   */
1246   if (!da->prealloc_only) {
1247     for (i = xs; i < xs + nx; i++) {
1248       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1249       pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1250 
1251       for (j = ys; j < ys + ny; j++) {
1252         slot = i - gxs + gnx * (j - gys);
1253 
1254         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1255         lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1256 
1257         for (k = 0; k < nc; k++) {
1258           cnt = 0;
1259           for (l = lstart; l < lend + 1; l++) {
1260             for (p = pstart; p < pend + 1; p++) {
1261               if (l || p) {
1262                 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */
1263                   for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p);
1264                 }
1265               } else {
1266                 if (dfill) {
1267                   for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p);
1268                 } else {
1269                   for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p);
1270                 }
1271               }
1272             }
1273           }
1274           row = k + nc * (slot);
1275           PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1276         }
1277       }
1278     }
1279     /* do not copy values to GPU since they are all zero and not yet needed there */
1280     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1281     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1282     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1283     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1284     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1285   }
1286   PetscCall(PetscFree(cols));
1287   PetscFunctionReturn(PETSC_SUCCESS);
1288 }
1289 
1290 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da, Mat J)
1291 {
1292   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1293   PetscInt               m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL;
1294   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
1295   MPI_Comm               comm;
1296   DMBoundaryType         bx, by, bz;
1297   ISLocalToGlobalMapping ltog, mltog;
1298   DMDAStencilType        st;
1299   PetscBool              removedups = PETSC_FALSE;
1300 
1301   PetscFunctionBegin;
1302   /*
1303          nc - number of components per grid point
1304          col - number of colors needed in one direction for single component problem
1305   */
1306   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
1307   if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE));
1308   col = 2 * s + 1;
1309 
1310   /*
1311        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1312        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1313   */
1314   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1315   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1316   if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE;
1317 
1318   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
1319   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
1320   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1321 
1322   PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols));
1323   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1324 
1325   PetscCall(MatSetBlockSize(J, nc));
1326   /* determine the matrix preallocation information */
1327   MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
1328   for (i = xs; i < xs + nx; i++) {
1329     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1330     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1331     for (j = ys; j < ys + ny; j++) {
1332       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1333       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1334       for (k = zs; k < zs + nz; k++) {
1335         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1336         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1337 
1338         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1339 
1340         cnt = 0;
1341         for (l = 0; l < nc; l++) {
1342           for (ii = istart; ii < iend + 1; ii++) {
1343             for (jj = jstart; jj < jend + 1; jj++) {
1344               for (kk = kstart; kk < kend + 1; kk++) {
1345                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1346                   cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
1347                 }
1348               }
1349             }
1350           }
1351           rows[l] = l + nc * (slot);
1352         }
1353         if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1354         else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1355       }
1356     }
1357   }
1358   PetscCall(MatSetBlockSize(J, nc));
1359   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
1360   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
1361   MatPreallocateEnd(dnz, onz);
1362   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1363   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1364 
1365   /*
1366     For each node in the grid: we get the neighbors in the local (on processor ordering
1367     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1368     PETSc ordering.
1369   */
1370   if (!da->prealloc_only) {
1371     for (i = xs; i < xs + nx; i++) {
1372       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1373       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1374       for (j = ys; j < ys + ny; j++) {
1375         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1376         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1377         for (k = zs; k < zs + nz; k++) {
1378           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1379           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1380 
1381           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1382 
1383           cnt = 0;
1384           for (kk = kstart; kk < kend + 1; kk++) {
1385             for (jj = jstart; jj < jend + 1; jj++) {
1386               for (ii = istart; ii < iend + 1; ii++) {
1387                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1388                   cols[cnt++] = nc * (slot + ii + gnx * jj + gnx * gny * kk);
1389                   for (l = 1; l < nc; l++) {
1390                     cols[cnt] = 1 + cols[cnt - 1];
1391                     cnt++;
1392                   }
1393                 }
1394               }
1395             }
1396           }
1397           rows[0] = nc * (slot);
1398           for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1399           PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1400         }
1401       }
1402     }
1403     /* do not copy values to GPU since they are all zero and not yet needed there */
1404     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1405     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1406     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1407     if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1408     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1409     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1410   }
1411   PetscCall(PetscFree2(rows, cols));
1412   PetscFunctionReturn(PETSC_SUCCESS);
1413 }
1414 
1415 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da, Mat J)
1416 {
1417   DM_DA                 *dd = (DM_DA *)da->data;
1418   PetscInt               xs, nx, i, j, gxs, gnx, row, k, l;
1419   PetscInt               m, dim, s, *cols = NULL, nc, cnt, maxcnt = 0, *ocols;
1420   PetscInt              *ofill = dd->ofill, *dfill = dd->dfill;
1421   DMBoundaryType         bx;
1422   ISLocalToGlobalMapping ltog;
1423   PetscMPIInt            rank, size;
1424 
1425   PetscFunctionBegin;
1426   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)da), &rank));
1427   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size));
1428 
1429   /*
1430          nc - number of components per grid point
1431   */
1432   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
1433   PetscCheck(s <= 1, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Matrix creation for 1d not implemented correctly for stencil width larger than 1");
1434   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
1435   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
1436 
1437   PetscCall(MatSetBlockSize(J, nc));
1438   PetscCall(PetscCalloc2(nx * nc, &cols, nx * nc, &ocols));
1439 
1440   /*
1441         note should be smaller for first and last process with no periodic
1442         does not handle dfill
1443   */
1444   cnt = 0;
1445   /* coupling with process to the left */
1446   for (i = 0; i < s; i++) {
1447     for (j = 0; j < nc; j++) {
1448       ocols[cnt] = ((rank == 0) ? 0 : (s - i) * (ofill[j + 1] - ofill[j]));
1449       cols[cnt]  = dfill[j + 1] - dfill[j] + (s + i) * (ofill[j + 1] - ofill[j]);
1450       if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1451         if (size > 1) ocols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]);
1452         else cols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]);
1453       }
1454       maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1455       cnt++;
1456     }
1457   }
1458   for (i = s; i < nx - s; i++) {
1459     for (j = 0; j < nc; j++) {
1460       cols[cnt] = dfill[j + 1] - dfill[j] + 2 * s * (ofill[j + 1] - ofill[j]);
1461       maxcnt    = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1462       cnt++;
1463     }
1464   }
1465   /* coupling with process to the right */
1466   for (i = nx - s; i < nx; i++) {
1467     for (j = 0; j < nc; j++) {
1468       ocols[cnt] = ((rank == (size - 1)) ? 0 : (i - nx + s + 1) * (ofill[j + 1] - ofill[j]));
1469       cols[cnt]  = dfill[j + 1] - dfill[j] + (s + nx - i - 1) * (ofill[j + 1] - ofill[j]);
1470       if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1471         if (size > 1) ocols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]);
1472         else cols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]);
1473       }
1474       maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1475       cnt++;
1476     }
1477   }
1478 
1479   PetscCall(MatSeqAIJSetPreallocation(J, 0, cols));
1480   PetscCall(MatMPIAIJSetPreallocation(J, 0, cols, 0, ocols));
1481   PetscCall(PetscFree2(cols, ocols));
1482 
1483   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1484   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1485 
1486   /*
1487     For each node in the grid: we get the neighbors in the local (on processor ordering
1488     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1489     PETSc ordering.
1490   */
1491   if (!da->prealloc_only) {
1492     PetscCall(PetscMalloc1(maxcnt, &cols));
1493     row = xs * nc;
1494     /* coupling with process to the left */
1495     for (i = xs; i < xs + s; i++) {
1496       for (j = 0; j < nc; j++) {
1497         cnt = 0;
1498         if (rank) {
1499           for (l = 0; l < s; l++) {
1500             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1501           }
1502         }
1503         if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1504           for (l = 0; l < s; l++) {
1505             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (m + i - s - l) * nc + ofill[k];
1506           }
1507         }
1508         if (dfill) {
1509           for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1510         } else {
1511           for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1512         }
1513         for (l = 0; l < s; l++) {
1514           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1515         }
1516         PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1517         row++;
1518       }
1519     }
1520     for (i = xs + s; i < xs + nx - s; i++) {
1521       for (j = 0; j < nc; j++) {
1522         cnt = 0;
1523         for (l = 0; l < s; l++) {
1524           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1525         }
1526         if (dfill) {
1527           for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1528         } else {
1529           for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1530         }
1531         for (l = 0; l < s; l++) {
1532           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1533         }
1534         PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1535         row++;
1536       }
1537     }
1538     /* coupling with process to the right */
1539     for (i = xs + nx - s; i < xs + nx; i++) {
1540       for (j = 0; j < nc; j++) {
1541         cnt = 0;
1542         for (l = 0; l < s; l++) {
1543           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1544         }
1545         if (dfill) {
1546           for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1547         } else {
1548           for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1549         }
1550         if (rank < size - 1) {
1551           for (l = 0; l < s; l++) {
1552             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1553           }
1554         }
1555         if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1556           for (l = 0; l < s; l++) {
1557             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s - l - m + 2) * nc + ofill[k];
1558           }
1559         }
1560         PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1561         row++;
1562       }
1563     }
1564     PetscCall(PetscFree(cols));
1565     /* do not copy values to GPU since they are all zero and not yet needed there */
1566     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1567     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1568     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1569     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1570     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1571   }
1572   PetscFunctionReturn(PETSC_SUCCESS);
1573 }
1574 
1575 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da, Mat J)
1576 {
1577   PetscInt               xs, nx, i, i1, slot, gxs, gnx;
1578   PetscInt               m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l;
1579   PetscInt               istart, iend;
1580   DMBoundaryType         bx;
1581   ISLocalToGlobalMapping ltog, mltog;
1582 
1583   PetscFunctionBegin;
1584   /*
1585          nc - number of components per grid point
1586          col - number of colors needed in one direction for single component problem
1587   */
1588   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
1589   if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE));
1590   col = 2 * s + 1;
1591 
1592   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
1593   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
1594 
1595   PetscCall(MatSetBlockSize(J, nc));
1596   PetscCall(MatSeqAIJSetPreallocation(J, col * nc, NULL));
1597   PetscCall(MatMPIAIJSetPreallocation(J, col * nc, NULL, col * nc, NULL));
1598 
1599   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1600   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1601   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1602 
1603   /*
1604     For each node in the grid: we get the neighbors in the local (on processor ordering
1605     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1606     PETSc ordering.
1607   */
1608   if (!da->prealloc_only) {
1609     PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols));
1610     for (i = xs; i < xs + nx; i++) {
1611       istart = PetscMax(-s, gxs - i);
1612       iend   = PetscMin(s, gxs + gnx - i - 1);
1613       slot   = i - gxs;
1614 
1615       cnt = 0;
1616       for (i1 = istart; i1 < iend + 1; i1++) {
1617         cols[cnt++] = nc * (slot + i1);
1618         for (l = 1; l < nc; l++) {
1619           cols[cnt] = 1 + cols[cnt - 1];
1620           cnt++;
1621         }
1622       }
1623       rows[0] = nc * (slot);
1624       for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1625       PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1626     }
1627     /* do not copy values to GPU since they are all zero and not yet needed there */
1628     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1629     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1630     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1631     if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1632     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1633     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1634     PetscCall(PetscFree2(rows, cols));
1635   }
1636   PetscFunctionReturn(PETSC_SUCCESS);
1637 }
1638 
1639 PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM da, Mat J)
1640 {
1641   PetscInt               xs, nx, i, i1, slot, gxs, gnx;
1642   PetscInt               m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l;
1643   PetscInt               istart, iend;
1644   DMBoundaryType         bx;
1645   ISLocalToGlobalMapping ltog, mltog;
1646 
1647   PetscFunctionBegin;
1648   /*
1649          nc - number of components per grid point
1650          col - number of colors needed in one direction for single component problem
1651   */
1652   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
1653   col = 2 * s + 1;
1654 
1655   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
1656   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
1657 
1658   PetscCall(MatSetBlockSize(J, nc));
1659   PetscCall(MatSeqAIJSetTotalPreallocation(J, nx * nc * col * nc));
1660 
1661   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1662   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1663   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1664 
1665   /*
1666     For each node in the grid: we get the neighbors in the local (on processor ordering
1667     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1668     PETSc ordering.
1669   */
1670   if (!da->prealloc_only) {
1671     PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols));
1672     for (i = xs; i < xs + nx; i++) {
1673       istart = PetscMax(-s, gxs - i);
1674       iend   = PetscMin(s, gxs + gnx - i - 1);
1675       slot   = i - gxs;
1676 
1677       cnt = 0;
1678       for (i1 = istart; i1 < iend + 1; i1++) {
1679         cols[cnt++] = nc * (slot + i1);
1680         for (l = 1; l < nc; l++) {
1681           cols[cnt] = 1 + cols[cnt - 1];
1682           cnt++;
1683         }
1684       }
1685       rows[0] = nc * (slot);
1686       for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1687       PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1688     }
1689     /* do not copy values to GPU since they are all zero and not yet needed there */
1690     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1691     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1692     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1693     if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1694     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1695     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1696     PetscCall(PetscFree2(rows, cols));
1697   }
1698   PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1699   PetscFunctionReturn(PETSC_SUCCESS);
1700 }
1701 
1702 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da, Mat J)
1703 {
1704   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1705   PetscInt               m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz;
1706   PetscInt               istart, iend, jstart, jend, ii, jj;
1707   MPI_Comm               comm;
1708   PetscScalar           *values;
1709   DMBoundaryType         bx, by;
1710   DMDAStencilType        st;
1711   ISLocalToGlobalMapping ltog;
1712 
1713   PetscFunctionBegin;
1714   /*
1715      nc - number of components per grid point
1716      col - number of colors needed in one direction for single component problem
1717   */
1718   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st));
1719   col = 2 * s + 1;
1720 
1721   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1722   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1723   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1724 
1725   PetscCall(PetscMalloc1(col * col * nc * nc, &cols));
1726 
1727   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1728 
1729   /* determine the matrix preallocation information */
1730   MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz);
1731   for (i = xs; i < xs + nx; i++) {
1732     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1733     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1734     for (j = ys; j < ys + ny; j++) {
1735       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1736       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1737       slot   = i - gxs + gnx * (j - gys);
1738 
1739       /* Find block columns in block row */
1740       cnt = 0;
1741       for (ii = istart; ii < iend + 1; ii++) {
1742         for (jj = jstart; jj < jend + 1; jj++) {
1743           if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1744             cols[cnt++] = slot + ii + gnx * jj;
1745           }
1746         }
1747       }
1748       PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz));
1749     }
1750   }
1751   PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz));
1752   PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
1753   MatPreallocateEnd(dnz, onz);
1754 
1755   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1756 
1757   /*
1758     For each node in the grid: we get the neighbors in the local (on processor ordering
1759     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1760     PETSc ordering.
1761   */
1762   if (!da->prealloc_only) {
1763     PetscCall(PetscCalloc1(col * col * nc * nc, &values));
1764     for (i = xs; i < xs + nx; i++) {
1765       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1766       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1767       for (j = ys; j < ys + ny; j++) {
1768         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1769         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1770         slot   = i - gxs + gnx * (j - gys);
1771         cnt    = 0;
1772         for (ii = istart; ii < iend + 1; ii++) {
1773           for (jj = jstart; jj < jend + 1; jj++) {
1774             if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1775               cols[cnt++] = slot + ii + gnx * jj;
1776             }
1777           }
1778         }
1779         PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
1780       }
1781     }
1782     PetscCall(PetscFree(values));
1783     /* do not copy values to GPU since they are all zero and not yet needed there */
1784     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1785     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1786     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1787     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1788     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1789   }
1790   PetscCall(PetscFree(cols));
1791   PetscFunctionReturn(PETSC_SUCCESS);
1792 }
1793 
1794 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da, Mat J)
1795 {
1796   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1797   PetscInt               m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz;
1798   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk;
1799   MPI_Comm               comm;
1800   PetscScalar           *values;
1801   DMBoundaryType         bx, by, bz;
1802   DMDAStencilType        st;
1803   ISLocalToGlobalMapping ltog;
1804 
1805   PetscFunctionBegin;
1806   /*
1807          nc - number of components per grid point
1808          col - number of colors needed in one direction for single component problem
1809   */
1810   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st));
1811   col = 2 * s + 1;
1812 
1813   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
1814   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
1815   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1816 
1817   PetscCall(PetscMalloc1(col * col * col, &cols));
1818 
1819   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1820 
1821   /* determine the matrix preallocation information */
1822   MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz);
1823   for (i = xs; i < xs + nx; i++) {
1824     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1825     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1826     for (j = ys; j < ys + ny; j++) {
1827       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1828       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1829       for (k = zs; k < zs + nz; k++) {
1830         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1831         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1832 
1833         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1834 
1835         /* Find block columns in block row */
1836         cnt = 0;
1837         for (ii = istart; ii < iend + 1; ii++) {
1838           for (jj = jstart; jj < jend + 1; jj++) {
1839             for (kk = kstart; kk < kend + 1; kk++) {
1840               if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1841                 cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
1842               }
1843             }
1844           }
1845         }
1846         PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz));
1847       }
1848     }
1849   }
1850   PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz));
1851   PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
1852   MatPreallocateEnd(dnz, onz);
1853 
1854   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1855 
1856   /*
1857     For each node in the grid: we get the neighbors in the local (on processor ordering
1858     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1859     PETSc ordering.
1860   */
1861   if (!da->prealloc_only) {
1862     PetscCall(PetscCalloc1(col * col * col * nc * nc, &values));
1863     for (i = xs; i < xs + nx; i++) {
1864       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1865       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1866       for (j = ys; j < ys + ny; j++) {
1867         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1868         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1869         for (k = zs; k < zs + nz; k++) {
1870           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1871           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1872 
1873           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1874 
1875           cnt = 0;
1876           for (ii = istart; ii < iend + 1; ii++) {
1877             for (jj = jstart; jj < jend + 1; jj++) {
1878               for (kk = kstart; kk < kend + 1; kk++) {
1879                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1880                   cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
1881                 }
1882               }
1883             }
1884           }
1885           PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
1886         }
1887       }
1888     }
1889     PetscCall(PetscFree(values));
1890     /* do not copy values to GPU since they are all zero and not yet needed there */
1891     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1892     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1893     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1894     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1895     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1896   }
1897   PetscCall(PetscFree(cols));
1898   PetscFunctionReturn(PETSC_SUCCESS);
1899 }
1900 
1901 /*
1902   This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to
1903   identify in the local ordering with periodic domain.
1904 */
1905 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog, PetscInt *row, PetscInt *cnt, PetscInt col[])
1906 {
1907   PetscInt i, n;
1908 
1909   PetscFunctionBegin;
1910   PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, 1, row, row));
1911   PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, *cnt, col, col));
1912   for (i = 0, n = 0; i < *cnt; i++) {
1913     if (col[i] >= *row) col[n++] = col[i];
1914   }
1915   *cnt = n;
1916   PetscFunctionReturn(PETSC_SUCCESS);
1917 }
1918 
1919 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da, Mat J)
1920 {
1921   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1922   PetscInt               m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz;
1923   PetscInt               istart, iend, jstart, jend, ii, jj;
1924   MPI_Comm               comm;
1925   PetscScalar           *values;
1926   DMBoundaryType         bx, by;
1927   DMDAStencilType        st;
1928   ISLocalToGlobalMapping ltog;
1929 
1930   PetscFunctionBegin;
1931   /*
1932      nc - number of components per grid point
1933      col - number of colors needed in one direction for single component problem
1934   */
1935   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st));
1936   col = 2 * s + 1;
1937 
1938   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1939   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1940   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1941 
1942   PetscCall(PetscMalloc1(col * col * nc * nc, &cols));
1943 
1944   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1945 
1946   /* determine the matrix preallocation information */
1947   MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz);
1948   for (i = xs; i < xs + nx; i++) {
1949     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1950     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1951     for (j = ys; j < ys + ny; j++) {
1952       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1953       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1954       slot   = i - gxs + gnx * (j - gys);
1955 
1956       /* Find block columns in block row */
1957       cnt = 0;
1958       for (ii = istart; ii < iend + 1; ii++) {
1959         for (jj = jstart; jj < jend + 1; jj++) {
1960           if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj;
1961         }
1962       }
1963       PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
1964       PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz));
1965     }
1966   }
1967   PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz));
1968   PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
1969   MatPreallocateEnd(dnz, onz);
1970 
1971   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1972 
1973   /*
1974     For each node in the grid: we get the neighbors in the local (on processor ordering
1975     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1976     PETSc ordering.
1977   */
1978   if (!da->prealloc_only) {
1979     PetscCall(PetscCalloc1(col * col * nc * nc, &values));
1980     for (i = xs; i < xs + nx; i++) {
1981       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1982       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1983       for (j = ys; j < ys + ny; j++) {
1984         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1985         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1986         slot   = i - gxs + gnx * (j - gys);
1987 
1988         /* Find block columns in block row */
1989         cnt = 0;
1990         for (ii = istart; ii < iend + 1; ii++) {
1991           for (jj = jstart; jj < jend + 1; jj++) {
1992             if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj;
1993           }
1994         }
1995         PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
1996         PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
1997       }
1998     }
1999     PetscCall(PetscFree(values));
2000     /* do not copy values to GPU since they are all zero and not yet needed there */
2001     PetscCall(MatBindToCPU(J, PETSC_TRUE));
2002     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
2003     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
2004     PetscCall(MatBindToCPU(J, PETSC_FALSE));
2005     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2006   }
2007   PetscCall(PetscFree(cols));
2008   PetscFunctionReturn(PETSC_SUCCESS);
2009 }
2010 
2011 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da, Mat J)
2012 {
2013   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
2014   PetscInt               m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz;
2015   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk;
2016   MPI_Comm               comm;
2017   PetscScalar           *values;
2018   DMBoundaryType         bx, by, bz;
2019   DMDAStencilType        st;
2020   ISLocalToGlobalMapping ltog;
2021 
2022   PetscFunctionBegin;
2023   /*
2024      nc - number of components per grid point
2025      col - number of colors needed in one direction for single component problem
2026   */
2027   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st));
2028   col = 2 * s + 1;
2029 
2030   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
2031   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
2032   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
2033 
2034   /* create the matrix */
2035   PetscCall(PetscMalloc1(col * col * col, &cols));
2036 
2037   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
2038 
2039   /* determine the matrix preallocation information */
2040   MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz);
2041   for (i = xs; i < xs + nx; i++) {
2042     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2043     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2044     for (j = ys; j < ys + ny; j++) {
2045       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2046       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2047       for (k = zs; k < zs + nz; k++) {
2048         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2049         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2050 
2051         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2052 
2053         /* Find block columns in block row */
2054         cnt = 0;
2055         for (ii = istart; ii < iend + 1; ii++) {
2056           for (jj = jstart; jj < jend + 1; jj++) {
2057             for (kk = kstart; kk < kend + 1; kk++) {
2058               if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
2059             }
2060           }
2061         }
2062         PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
2063         PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz));
2064       }
2065     }
2066   }
2067   PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz));
2068   PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
2069   MatPreallocateEnd(dnz, onz);
2070 
2071   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
2072 
2073   /*
2074     For each node in the grid: we get the neighbors in the local (on processor ordering
2075     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2076     PETSc ordering.
2077   */
2078   if (!da->prealloc_only) {
2079     PetscCall(PetscCalloc1(col * col * col * nc * nc, &values));
2080     for (i = xs; i < xs + nx; i++) {
2081       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2082       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2083       for (j = ys; j < ys + ny; j++) {
2084         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2085         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2086         for (k = zs; k < zs + nz; k++) {
2087           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2088           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2089 
2090           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2091 
2092           cnt = 0;
2093           for (ii = istart; ii < iend + 1; ii++) {
2094             for (jj = jstart; jj < jend + 1; jj++) {
2095               for (kk = kstart; kk < kend + 1; kk++) {
2096                 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
2097               }
2098             }
2099           }
2100           PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
2101           PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
2102         }
2103       }
2104     }
2105     PetscCall(PetscFree(values));
2106     /* do not copy values to GPU since they are all zero and not yet needed there */
2107     PetscCall(MatBindToCPU(J, PETSC_TRUE));
2108     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
2109     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
2110     PetscCall(MatBindToCPU(J, PETSC_FALSE));
2111     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2112   }
2113   PetscCall(PetscFree(cols));
2114   PetscFunctionReturn(PETSC_SUCCESS);
2115 }
2116 
2117 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da, Mat J)
2118 {
2119   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
2120   PetscInt               m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, *dnz, *onz;
2121   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
2122   DM_DA                 *dd = (DM_DA *)da->data;
2123   PetscInt               ifill_col, *dfill = dd->dfill, *ofill = dd->ofill;
2124   MPI_Comm               comm;
2125   PetscScalar           *values;
2126   DMBoundaryType         bx, by, bz;
2127   ISLocalToGlobalMapping ltog;
2128   DMDAStencilType        st;
2129   PetscBool              removedups = PETSC_FALSE;
2130 
2131   PetscFunctionBegin;
2132   /*
2133          nc - number of components per grid point
2134          col - number of colors needed in one direction for single component problem
2135   */
2136   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
2137   col = 2 * s + 1;
2138 
2139   /*
2140        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
2141        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
2142   */
2143   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
2144   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
2145   if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE;
2146 
2147   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
2148   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
2149   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
2150 
2151   PetscCall(PetscMalloc1(col * col * col * nc, &cols));
2152   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
2153 
2154   /* determine the matrix preallocation information */
2155   MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
2156 
2157   PetscCall(MatSetBlockSize(J, nc));
2158   for (i = xs; i < xs + nx; i++) {
2159     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2160     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2161     for (j = ys; j < ys + ny; j++) {
2162       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2163       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2164       for (k = zs; k < zs + nz; k++) {
2165         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2166         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2167 
2168         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2169 
2170         for (l = 0; l < nc; l++) {
2171           cnt = 0;
2172           for (ii = istart; ii < iend + 1; ii++) {
2173             for (jj = jstart; jj < jend + 1; jj++) {
2174               for (kk = kstart; kk < kend + 1; kk++) {
2175                 if (ii || jj || kk) {
2176                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
2177                     for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2178                   }
2179                 } else {
2180                   if (dfill) {
2181                     for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2182                   } else {
2183                     for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2184                   }
2185                 }
2186               }
2187             }
2188           }
2189           row    = l + nc * (slot);
2190           maxcnt = PetscMax(maxcnt, cnt);
2191           if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
2192           else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
2193         }
2194       }
2195     }
2196   }
2197   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
2198   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
2199   MatPreallocateEnd(dnz, onz);
2200   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
2201 
2202   /*
2203     For each node in the grid: we get the neighbors in the local (on processor ordering
2204     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2205     PETSc ordering.
2206   */
2207   if (!da->prealloc_only) {
2208     PetscCall(PetscCalloc1(maxcnt, &values));
2209     for (i = xs; i < xs + nx; i++) {
2210       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2211       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2212       for (j = ys; j < ys + ny; j++) {
2213         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2214         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2215         for (k = zs; k < zs + nz; k++) {
2216           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2217           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2218 
2219           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2220 
2221           for (l = 0; l < nc; l++) {
2222             cnt = 0;
2223             for (ii = istart; ii < iend + 1; ii++) {
2224               for (jj = jstart; jj < jend + 1; jj++) {
2225                 for (kk = kstart; kk < kend + 1; kk++) {
2226                   if (ii || jj || kk) {
2227                     if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
2228                       for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2229                     }
2230                   } else {
2231                     if (dfill) {
2232                       for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2233                     } else {
2234                       for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2235                     }
2236                   }
2237                 }
2238               }
2239             }
2240             row = l + nc * (slot);
2241             PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, values, INSERT_VALUES));
2242           }
2243         }
2244       }
2245     }
2246     PetscCall(PetscFree(values));
2247     /* do not copy values to GPU since they are all zero and not yet needed there */
2248     PetscCall(MatBindToCPU(J, PETSC_TRUE));
2249     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
2250     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
2251     PetscCall(MatBindToCPU(J, PETSC_FALSE));
2252     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2253   }
2254   PetscCall(PetscFree(cols));
2255   PetscFunctionReturn(PETSC_SUCCESS);
2256 }
2257