xref: /petsc/src/dm/impls/da/fdda.c (revision feefa0e191a340680bb02e1467a36facdcb0b150) !
1 #include <petsc/private/dmdaimpl.h> /*I      "petscdmda.h"     I*/
2 #include <petscmat.h>
3 #include <petscbt.h>
4 
5 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM, ISColoringType, ISColoring *);
6 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM, ISColoringType, ISColoring *);
7 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM, ISColoringType, ISColoring *);
8 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM, ISColoringType, ISColoring *);
9 
10 /*
11    For ghost i that may be negative or greater than the upper bound this
12   maps it into the 0:m-1 range using periodicity
13 */
14 #define SetInRange(i, m) ((i < 0) ? m + i : ((i >= m) ? i - m : i))
15 
16 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill, PetscInt w, PetscInt **rfill)
17 {
18   PetscInt i, j, nz, *fill;
19 
20   PetscFunctionBegin;
21   if (!dfill) PetscFunctionReturn(PETSC_SUCCESS);
22 
23   /* count number nonzeros */
24   nz = 0;
25   for (i = 0; i < w; i++) {
26     for (j = 0; j < w; j++) {
27       if (dfill[w * i + j]) nz++;
28     }
29   }
30   PetscCall(PetscMalloc1(nz + w + 1, &fill));
31   /* construct modified CSR storage of nonzero structure */
32   /*  fill[0 -- w] marks starts of each row of column indices (and end of last row)
33    so fill[1] - fill[0] gives number of nonzeros in first row etc */
34   nz = w + 1;
35   for (i = 0; i < w; i++) {
36     fill[i] = nz;
37     for (j = 0; j < w; j++) {
38       if (dfill[w * i + j]) {
39         fill[nz] = j;
40         nz++;
41       }
42     }
43   }
44   fill[w] = nz;
45 
46   *rfill = fill;
47   PetscFunctionReturn(PETSC_SUCCESS);
48 }
49 
50 static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse, PetscInt w, PetscInt **rfill)
51 {
52   PetscInt nz;
53 
54   PetscFunctionBegin;
55   if (!dfillsparse) PetscFunctionReturn(PETSC_SUCCESS);
56 
57   /* Determine number of non-zeros */
58   nz = (dfillsparse[w] - w - 1);
59 
60   /* Allocate space for our copy of the given sparse matrix representation. */
61   PetscCall(PetscMalloc1(nz + w + 1, rfill));
62   PetscCall(PetscArraycpy(*rfill, dfillsparse, nz + w + 1));
63   PetscFunctionReturn(PETSC_SUCCESS);
64 }
65 
66 static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd)
67 {
68   PetscInt i, k, cnt = 1;
69 
70   PetscFunctionBegin;
71 
72   /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of
73    columns to the left with any nonzeros in them plus 1 */
74   PetscCall(PetscCalloc1(dd->w, &dd->ofillcols));
75   for (i = 0; i < dd->w; i++) {
76     for (k = dd->ofill[i]; k < dd->ofill[i + 1]; k++) dd->ofillcols[dd->ofill[k]] = 1;
77   }
78   for (i = 0; i < dd->w; i++) {
79     if (dd->ofillcols[i]) dd->ofillcols[i] = cnt++;
80   }
81   PetscFunctionReturn(PETSC_SUCCESS);
82 }
83 
84 /*@
85   DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem
86   of the matrix returned by `DMCreateMatrix()`.
87 
88   Logically Collective
89 
90   Input Parameters:
91 + da    - the distributed array
92 . dfill - the fill pattern in the diagonal block (may be NULL, means use dense block)
93 - ofill - the fill pattern in the off-diagonal blocks
94 
95   Level: developer
96 
97   Notes:
98   This only makes sense when you are doing multicomponent problems but using the
99   `MATMPIAIJ` matrix format
100 
101   The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries
102   representing coupling and 0 entries for missing coupling. For example
103 .vb
104             dfill[9] = {1, 0, 0,
105                         1, 1, 0,
106                         0, 1, 1}
107 .ve
108   means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with
109   itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the
110   diagonal block).
111 
112   `DMDASetGetMatrix()` allows you to provide general code for those more complicated nonzero patterns then
113   can be represented in the dfill, ofill format
114 
115   Contributed by Glenn Hammond
116 
117 .seealso: `DM`, `DMDA`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()`
118 @*/
119 PetscErrorCode DMDASetBlockFills(DM da, const PetscInt *dfill, const PetscInt *ofill)
120 {
121   DM_DA *dd = (DM_DA *)da->data;
122 
123   PetscFunctionBegin;
124   /* save the given dfill and ofill information */
125   PetscCall(DMDASetBlockFills_Private(dfill, dd->w, &dd->dfill));
126   PetscCall(DMDASetBlockFills_Private(ofill, dd->w, &dd->ofill));
127 
128   /* count nonzeros in ofill columns */
129   PetscCall(DMDASetBlockFills_Private2(dd));
130   PetscFunctionReturn(PETSC_SUCCESS);
131 }
132 
133 /*@
134   DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem
135   of the matrix returned by `DMCreateMatrix()`, using sparse representations
136   of fill patterns.
137 
138   Logically Collective
139 
140   Input Parameters:
141 + da          - the distributed array
142 . dfillsparse - the sparse fill pattern in the diagonal block (may be `NULL`, means use dense block)
143 - ofillsparse - the sparse fill pattern in the off-diagonal blocks
144 
145   Level: developer
146 
147   Notes:
148   This only makes sense when you are doing multicomponent problems but using the
149   `MATMPIAIJ` matrix format
150 
151   The format for `dfill` and `ofill` is a sparse representation of a
152   dof-by-dof matrix with 1 entries representing coupling and 0 entries
153   for missing coupling.  The sparse representation is a 1 dimensional
154   array of length nz + dof + 1, where nz is the number of non-zeros in
155   the matrix.  The first dof entries in the array give the
156   starting array indices of each row's items in the rest of the array,
157   the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array)
158   and the remaining nz items give the column indices of each of
159   the 1s within the logical 2D matrix.  Each row's items within
160   the array are the column indices of the 1s within that row
161   of the 2D matrix.  PETSc developers may recognize that this is the
162   same format as that computed by the `DMDASetBlockFills_Private()`
163   function from a dense 2D matrix representation.
164 
165   `DMDASetGetMatrix()` allows you to provide general code for those more complicated nonzero patterns then
166   can be represented in the `dfill`, `ofill` format
167 
168   Contributed by Philip C. Roth
169 
170 .seealso: `DM`, `DMDA`, `DMDASetBlockFills()`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()`
171 @*/
172 PetscErrorCode DMDASetBlockFillsSparse(DM da, const PetscInt *dfillsparse, const PetscInt *ofillsparse)
173 {
174   DM_DA *dd = (DM_DA *)da->data;
175 
176   PetscFunctionBegin;
177   /* save the given dfill and ofill information */
178   PetscCall(DMDASetBlockFillsSparse_Private(dfillsparse, dd->w, &dd->dfill));
179   PetscCall(DMDASetBlockFillsSparse_Private(ofillsparse, dd->w, &dd->ofill));
180 
181   /* count nonzeros in ofill columns */
182   PetscCall(DMDASetBlockFills_Private2(dd));
183   PetscFunctionReturn(PETSC_SUCCESS);
184 }
185 
186 PetscErrorCode DMCreateColoring_DA(DM da, ISColoringType ctype, ISColoring *coloring)
187 {
188   PetscInt       dim, m, n, p, nc;
189   DMBoundaryType bx, by, bz;
190   MPI_Comm       comm;
191   PetscMPIInt    size;
192   PetscBool      isBAIJ;
193   DM_DA         *dd = (DM_DA *)da->data;
194 
195   PetscFunctionBegin;
196   /*
197                                   m
198           ------------------------------------------------------
199          |                                                     |
200          |                                                     |
201          |               ----------------------                |
202          |               |                    |                |
203       n  |           yn  |                    |                |
204          |               |                    |                |
205          |               .---------------------                |
206          |             (xs,ys)     xn                          |
207          |            .                                        |
208          |         (gxs,gys)                                   |
209          |                                                     |
210           -----------------------------------------------------
211   */
212 
213   /*
214          nc - number of components per grid point
215          col - number of colors needed in one direction for single component problem
216 
217   */
218   PetscCall(DMDAGetInfo(da, &dim, NULL, NULL, NULL, &m, &n, &p, &nc, NULL, &bx, &by, &bz, NULL));
219 
220   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
221   PetscCallMPI(MPI_Comm_size(comm, &size));
222   if (ctype == IS_COLORING_LOCAL) {
223     if (size == 1) {
224       ctype = IS_COLORING_GLOBAL;
225     } else {
226       PetscCheck((dim == 1) || !((m == 1 && bx == DM_BOUNDARY_PERIODIC) || (n == 1 && by == DM_BOUNDARY_PERIODIC) || (p == 1 && bz == DM_BOUNDARY_PERIODIC)), PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain on the same process");
227     }
228   }
229 
230   /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ
231      matrices is for the blocks, not the individual matrix elements  */
232   PetscCall(PetscStrbeginswith(da->mattype, MATBAIJ, &isBAIJ));
233   if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATMPIBAIJ, &isBAIJ));
234   if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATSEQBAIJ, &isBAIJ));
235   if (isBAIJ) {
236     dd->w  = 1;
237     dd->xs = dd->xs / nc;
238     dd->xe = dd->xe / nc;
239     dd->Xs = dd->Xs / nc;
240     dd->Xe = dd->Xe / nc;
241   }
242 
243   /*
244      We do not provide a getcoloring function in the DMDA operations because
245    the basic DMDA does not know about matrices. We think of DMDA as being
246    more low-level then matrices.
247   */
248   if (dim == 1) PetscCall(DMCreateColoring_DA_1d_MPIAIJ(da, ctype, coloring));
249   else if (dim == 2) PetscCall(DMCreateColoring_DA_2d_MPIAIJ(da, ctype, coloring));
250   else if (dim == 3) PetscCall(DMCreateColoring_DA_3d_MPIAIJ(da, ctype, coloring));
251   else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not done for %" PetscInt_FMT " dimension, send us mail petsc-maint@mcs.anl.gov for code", dim);
252   if (isBAIJ) {
253     dd->w  = nc;
254     dd->xs = dd->xs * nc;
255     dd->xe = dd->xe * nc;
256     dd->Xs = dd->Xs * nc;
257     dd->Xe = dd->Xe * nc;
258   }
259   PetscFunctionReturn(PETSC_SUCCESS);
260 }
261 
262 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
263 {
264   PetscInt         xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, M, N, dim, s, k, nc, col;
265   PetscInt         ncolors = 0;
266   MPI_Comm         comm;
267   DMBoundaryType   bx, by;
268   DMDAStencilType  st;
269   ISColoringValue *colors;
270   DM_DA           *dd = (DM_DA *)da->data;
271 
272   PetscFunctionBegin;
273   /*
274          nc - number of components per grid point
275          col - number of colors needed in one direction for single component problem
276 
277   */
278   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st));
279   col = 2 * s + 1;
280   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
281   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
282   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
283 
284   /* special case as taught to us by Paul Hovland */
285   if (st == DMDA_STENCIL_STAR && s == 1) {
286     PetscCall(DMCreateColoring_DA_2d_5pt_MPIAIJ(da, ctype, coloring));
287   } else {
288     if (ctype == IS_COLORING_GLOBAL) {
289       if (!dd->localcoloring) {
290         PetscCall(PetscMalloc1(nc * nx * ny, &colors));
291         ii = 0;
292         for (j = ys; j < ys + ny; j++) {
293           for (i = xs; i < xs + nx; i++) {
294             for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((i % col) + col * (j % col));
295           }
296         }
297         ncolors = nc + nc * (col - 1 + col * (col - 1));
298         PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring));
299       }
300       *coloring = dd->localcoloring;
301     } else if (ctype == IS_COLORING_LOCAL) {
302       if (!dd->ghostedcoloring) {
303         PetscCall(PetscMalloc1(nc * gnx * gny, &colors));
304         ii = 0;
305         for (j = gys; j < gys + gny; j++) {
306           for (i = gxs; i < gxs + gnx; i++) {
307             for (k = 0; k < nc; k++) {
308               /* the complicated stuff is to handle periodic boundaries */
309               colors[ii++] = k + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col));
310             }
311           }
312         }
313         ncolors = nc + nc * (col - 1 + col * (col - 1));
314         PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
315         /* PetscIntView(ncolors,(PetscInt*)colors,0); */
316 
317         PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
318       }
319       *coloring = dd->ghostedcoloring;
320     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
321   }
322   PetscCall(ISColoringReference(*coloring));
323   PetscFunctionReturn(PETSC_SUCCESS);
324 }
325 
326 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
327 {
328   PetscInt         xs, ys, nx, ny, i, j, gxs, gys, gnx, gny, m, n, p, dim, s, k, nc, col, zs, gzs, ii, l, nz, gnz, M, N, P;
329   PetscInt         ncolors;
330   MPI_Comm         comm;
331   DMBoundaryType   bx, by, bz;
332   DMDAStencilType  st;
333   ISColoringValue *colors;
334   DM_DA           *dd = (DM_DA *)da->data;
335 
336   PetscFunctionBegin;
337   /*
338          nc - number of components per grid point
339          col - number of colors needed in one direction for single component problem
340   */
341   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
342   col = 2 * s + 1;
343   PetscCheck(bx != DM_BOUNDARY_PERIODIC || (m % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in X is divisible\n\
344                  by 2*stencil_width + 1\n");
345   PetscCheck(by != DM_BOUNDARY_PERIODIC || (n % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Y is divisible\n\
346                  by 2*stencil_width + 1\n");
347   PetscCheck(bz != DM_BOUNDARY_PERIODIC || (p % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Z is divisible\n\
348                  by 2*stencil_width + 1\n");
349 
350   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
351   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
352   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
353 
354   /* create the coloring */
355   if (ctype == IS_COLORING_GLOBAL) {
356     if (!dd->localcoloring) {
357       PetscCall(PetscMalloc1(nc * nx * ny * nz, &colors));
358       ii = 0;
359       for (k = zs; k < zs + nz; k++) {
360         for (j = ys; j < ys + ny; j++) {
361           for (i = xs; i < xs + nx; i++) {
362             for (l = 0; l < nc; l++) colors[ii++] = l + nc * ((i % col) + col * (j % col) + col * col * (k % col));
363           }
364         }
365       }
366       ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1));
367       PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny * nz, colors, PETSC_OWN_POINTER, &dd->localcoloring));
368     }
369     *coloring = dd->localcoloring;
370   } else if (ctype == IS_COLORING_LOCAL) {
371     if (!dd->ghostedcoloring) {
372       PetscCall(PetscMalloc1(nc * gnx * gny * gnz, &colors));
373       ii = 0;
374       for (k = gzs; k < gzs + gnz; k++) {
375         for (j = gys; j < gys + gny; j++) {
376           for (i = gxs; i < gxs + gnx; i++) {
377             for (l = 0; l < nc; l++) {
378               /* the complicated stuff is to handle periodic boundaries */
379               colors[ii++] = l + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col) + col * col * (SetInRange(k, p) % col));
380             }
381           }
382         }
383       }
384       ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1));
385       PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny * gnz, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
386       PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
387     }
388     *coloring = dd->ghostedcoloring;
389   } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
390   PetscCall(ISColoringReference(*coloring));
391   PetscFunctionReturn(PETSC_SUCCESS);
392 }
393 
394 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
395 {
396   PetscInt         xs, nx, i, i1, gxs, gnx, l, m, M, dim, s, nc, col;
397   PetscInt         ncolors;
398   MPI_Comm         comm;
399   DMBoundaryType   bx;
400   ISColoringValue *colors;
401   DM_DA           *dd = (DM_DA *)da->data;
402 
403   PetscFunctionBegin;
404   /*
405          nc - number of components per grid point
406          col - number of colors needed in one direction for single component problem
407   */
408   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, &M, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
409   col = 2 * s + 1;
410   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
411   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
412   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
413 
414   /* create the coloring */
415   if (ctype == IS_COLORING_GLOBAL) {
416     if (!dd->localcoloring) {
417       PetscCall(PetscMalloc1(nc * nx, &colors));
418       if (dd->ofillcols) {
419         PetscInt tc = 0;
420         for (i = 0; i < nc; i++) tc += (PetscInt)(dd->ofillcols[i] > 0);
421         i1 = 0;
422         for (i = xs; i < xs + nx; i++) {
423           for (l = 0; l < nc; l++) {
424             if (dd->ofillcols[l] && (i % col)) {
425               colors[i1++] = nc - 1 + tc * ((i % col) - 1) + dd->ofillcols[l];
426             } else {
427               colors[i1++] = l;
428             }
429           }
430         }
431         ncolors = nc + 2 * s * tc;
432       } else {
433         i1 = 0;
434         for (i = xs; i < xs + nx; i++) {
435           for (l = 0; l < nc; l++) colors[i1++] = l + nc * (i % col);
436         }
437         ncolors = nc + nc * (col - 1);
438       }
439       PetscCall(ISColoringCreate(comm, ncolors, nc * nx, colors, PETSC_OWN_POINTER, &dd->localcoloring));
440     }
441     *coloring = dd->localcoloring;
442   } else if (ctype == IS_COLORING_LOCAL) {
443     if (!dd->ghostedcoloring) {
444       PetscCall(PetscMalloc1(nc * gnx, &colors));
445       i1 = 0;
446       for (i = gxs; i < gxs + gnx; i++) {
447         for (l = 0; l < nc; l++) {
448           /* the complicated stuff is to handle periodic boundaries */
449           colors[i1++] = l + nc * (SetInRange(i, m) % col);
450         }
451       }
452       ncolors = nc + nc * (col - 1);
453       PetscCall(ISColoringCreate(comm, ncolors, nc * gnx, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
454       PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
455     }
456     *coloring = dd->ghostedcoloring;
457   } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
458   PetscCall(ISColoringReference(*coloring));
459   PetscFunctionReturn(PETSC_SUCCESS);
460 }
461 
462 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
463 {
464   PetscInt         xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, dim, s, k, nc;
465   PetscInt         ncolors;
466   MPI_Comm         comm;
467   DMBoundaryType   bx, by;
468   ISColoringValue *colors;
469   DM_DA           *dd = (DM_DA *)da->data;
470 
471   PetscFunctionBegin;
472   /*
473          nc - number of components per grid point
474          col - number of colors needed in one direction for single component problem
475   */
476   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, NULL));
477   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
478   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
479   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
480   /* create the coloring */
481   if (ctype == IS_COLORING_GLOBAL) {
482     if (!dd->localcoloring) {
483       PetscCall(PetscMalloc1(nc * nx * ny, &colors));
484       ii = 0;
485       for (j = ys; j < ys + ny; j++) {
486         for (i = xs; i < xs + nx; i++) {
487           for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((3 * j + i) % 5);
488         }
489       }
490       ncolors = 5 * nc;
491       PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring));
492     }
493     *coloring = dd->localcoloring;
494   } else if (ctype == IS_COLORING_LOCAL) {
495     if (!dd->ghostedcoloring) {
496       PetscCall(PetscMalloc1(nc * gnx * gny, &colors));
497       ii = 0;
498       for (j = gys; j < gys + gny; j++) {
499         for (i = gxs; i < gxs + gnx; i++) {
500           for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((3 * SetInRange(j, n) + SetInRange(i, m)) % 5);
501         }
502       }
503       ncolors = 5 * nc;
504       PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
505       PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
506     }
507     *coloring = dd->ghostedcoloring;
508   } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
509   PetscFunctionReturn(PETSC_SUCCESS);
510 }
511 
512 /* =========================================================================== */
513 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM, Mat);
514 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM, Mat);
515 extern PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM, Mat);
516 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM, Mat);
517 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM, Mat);
518 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM, Mat);
519 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM, Mat);
520 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM, Mat);
521 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM, Mat);
522 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM, Mat);
523 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM, Mat);
524 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM, Mat);
525 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM, Mat);
526 extern PetscErrorCode DMCreateMatrix_DA_IS(DM, Mat);
527 
528 /*@C
529   MatSetupDM - Sets the `DMDA` that is to be used by the HYPRE_StructMatrix PETSc matrix
530 
531   Logically Collective
532 
533   Input Parameters:
534 + mat - the matrix
535 - da  - the da
536 
537   Level: intermediate
538 
539 .seealso: `DMDA`, `Mat`, `MatSetUp()`
540 @*/
541 PetscErrorCode MatSetupDM(Mat mat, DM da)
542 {
543   PetscFunctionBegin;
544   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
545   PetscValidHeaderSpecificType(da, DM_CLASSID, 2, DMDA);
546   PetscTryMethod(mat, "MatSetupDM_C", (Mat, DM), (mat, da));
547   PetscFunctionReturn(PETSC_SUCCESS);
548 }
549 
550 PetscErrorCode MatView_MPI_DA(Mat A, PetscViewer viewer)
551 {
552   DM                da;
553   const char       *prefix;
554   Mat               AA, Anatural;
555   AO                ao;
556   PetscInt          rstart, rend, *petsc, i;
557   IS                is;
558   MPI_Comm          comm;
559   PetscViewerFormat format;
560   PetscBool         flag;
561 
562   PetscFunctionBegin;
563   /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */
564   PetscCall(PetscViewerGetFormat(viewer, &format));
565   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(PETSC_SUCCESS);
566 
567   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
568   PetscCall(MatGetDM(A, &da));
569   PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA");
570 
571   PetscCall(DMDAGetAO(da, &ao));
572   PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
573   PetscCall(PetscMalloc1(rend - rstart, &petsc));
574   for (i = rstart; i < rend; i++) petsc[i - rstart] = i;
575   PetscCall(AOApplicationToPetsc(ao, rend - rstart, petsc));
576   PetscCall(ISCreateGeneral(comm, rend - rstart, petsc, PETSC_OWN_POINTER, &is));
577 
578   /* call viewer on natural ordering */
579   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPISELL, &flag));
580   if (flag) {
581     PetscCall(MatConvert(A, MATAIJ, MAT_INITIAL_MATRIX, &AA));
582     A = AA;
583   }
584   PetscCall(MatCreateSubMatrix(A, is, is, MAT_INITIAL_MATRIX, &Anatural));
585   PetscCall(ISDestroy(&is));
586   PetscCall(PetscObjectGetOptionsPrefix((PetscObject)A, &prefix));
587   PetscCall(PetscObjectSetOptionsPrefix((PetscObject)Anatural, prefix));
588   PetscCall(PetscObjectSetName((PetscObject)Anatural, ((PetscObject)A)->name));
589   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE;
590   PetscCall(MatView(Anatural, viewer));
591   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE;
592   PetscCall(MatDestroy(&Anatural));
593   if (flag) PetscCall(MatDestroy(&AA));
594   PetscFunctionReturn(PETSC_SUCCESS);
595 }
596 
597 PetscErrorCode MatLoad_MPI_DA(Mat A, PetscViewer viewer)
598 {
599   DM       da;
600   Mat      Anatural, Aapp;
601   AO       ao;
602   PetscInt rstart, rend, *app, i, m, n, M, N;
603   IS       is;
604   MPI_Comm comm;
605 
606   PetscFunctionBegin;
607   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
608   PetscCall(MatGetDM(A, &da));
609   PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA");
610 
611   /* Load the matrix in natural ordering */
612   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Anatural));
613   PetscCall(MatSetType(Anatural, ((PetscObject)A)->type_name));
614   PetscCall(MatGetSize(A, &M, &N));
615   PetscCall(MatGetLocalSize(A, &m, &n));
616   PetscCall(MatSetSizes(Anatural, m, n, M, N));
617   PetscCall(MatLoad(Anatural, viewer));
618 
619   /* Map natural ordering to application ordering and create IS */
620   PetscCall(DMDAGetAO(da, &ao));
621   PetscCall(MatGetOwnershipRange(Anatural, &rstart, &rend));
622   PetscCall(PetscMalloc1(rend - rstart, &app));
623   for (i = rstart; i < rend; i++) app[i - rstart] = i;
624   PetscCall(AOPetscToApplication(ao, rend - rstart, app));
625   PetscCall(ISCreateGeneral(comm, rend - rstart, app, PETSC_OWN_POINTER, &is));
626 
627   /* Do permutation and replace header */
628   PetscCall(MatCreateSubMatrix(Anatural, is, is, MAT_INITIAL_MATRIX, &Aapp));
629   PetscCall(MatHeaderReplace(A, &Aapp));
630   PetscCall(ISDestroy(&is));
631   PetscCall(MatDestroy(&Anatural));
632   PetscFunctionReturn(PETSC_SUCCESS);
633 }
634 
635 PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J)
636 {
637   PetscInt    dim, dof, nx, ny, nz, dims[3], starts[3], M, N, P;
638   Mat         A;
639   MPI_Comm    comm;
640   MatType     Atype;
641   MatType     mtype;
642   PetscMPIInt size;
643   DM_DA      *dd    = (DM_DA *)da->data;
644   void (*aij)(void) = NULL, (*baij)(void) = NULL, (*sbaij)(void) = NULL, (*sell)(void) = NULL, (*is)(void) = NULL;
645 
646   PetscFunctionBegin;
647   PetscCall(MatInitializePackage());
648   mtype = da->mattype;
649 
650   /*
651                                   m
652           ------------------------------------------------------
653          |                                                     |
654          |                                                     |
655          |               ----------------------                |
656          |               |                    |                |
657       n  |           ny  |                    |                |
658          |               |                    |                |
659          |               .---------------------                |
660          |             (xs,ys)     nx                          |
661          |            .                                        |
662          |         (gxs,gys)                                   |
663          |                                                     |
664           -----------------------------------------------------
665   */
666 
667   /*
668          nc - number of components per grid point
669          col - number of colors needed in one direction for single component problem
670   */
671   M   = dd->M;
672   N   = dd->N;
673   P   = dd->P;
674   dim = da->dim;
675   dof = dd->w;
676   /* PetscCall(DMDAGetInfo(da,&dim,&M,&N,&P,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL)); */
677   PetscCall(DMDAGetCorners(da, NULL, NULL, NULL, &nx, &ny, &nz));
678   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
679   PetscCall(MatCreate(comm, &A));
680   PetscCall(MatSetSizes(A, dof * nx * ny * nz, dof * nx * ny * nz, dof * M * N * P, dof * M * N * P));
681   PetscCall(MatSetType(A, mtype));
682   PetscCall(MatSetFromOptions(A));
683   if (dof * nx * ny * nz < da->bind_below) {
684     PetscCall(MatSetBindingPropagates(A, PETSC_TRUE));
685     PetscCall(MatBindToCPU(A, PETSC_TRUE));
686   }
687   PetscCall(MatSetDM(A, da));
688   if (da->structure_only) PetscCall(MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE));
689   PetscCall(MatGetType(A, &Atype));
690   /*
691      We do not provide a getmatrix function in the DMDA operations because
692    the basic DMDA does not know about matrices. We think of DMDA as being more
693    more low-level than matrices. This is kind of cheating but, cause sometimes
694    we think of DMDA has higher level than matrices.
695 
696      We could switch based on Atype (or mtype), but we do not since the
697    specialized setting routines depend only on the particular preallocation
698    details of the matrix, not the type itself.
699   */
700   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJSetPreallocation_C", &aij));
701   if (!aij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJSetPreallocation_C", &aij));
702   if (!aij) {
703     PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIBAIJSetPreallocation_C", &baij));
704     if (!baij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", &baij));
705     if (!baij) {
706       PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPISBAIJSetPreallocation_C", &sbaij));
707       if (!sbaij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqSBAIJSetPreallocation_C", &sbaij));
708       if (!sbaij) {
709         PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPISELLSetPreallocation_C", &sell));
710         if (!sell) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqSELLSetPreallocation_C", &sell));
711       }
712       if (!sell) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatISSetPreallocation_C", &is));
713     }
714   }
715   if (aij) {
716     if (dim == 1) {
717       if (dd->ofill) {
718         PetscCall(DMCreateMatrix_DA_1d_MPIAIJ_Fill(da, A));
719       } else {
720         DMBoundaryType bx;
721         PetscMPIInt    size;
722         PetscCall(DMDAGetInfo(da, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &bx, NULL, NULL, NULL));
723         PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size));
724         if (size == 1 && bx == DM_BOUNDARY_NONE) {
725           PetscCall(DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(da, A));
726         } else {
727           PetscCall(DMCreateMatrix_DA_1d_MPIAIJ(da, A));
728         }
729       }
730     } else if (dim == 2) {
731       if (dd->ofill) {
732         PetscCall(DMCreateMatrix_DA_2d_MPIAIJ_Fill(da, A));
733       } else {
734         PetscCall(DMCreateMatrix_DA_2d_MPIAIJ(da, A));
735       }
736     } else if (dim == 3) {
737       if (dd->ofill) {
738         PetscCall(DMCreateMatrix_DA_3d_MPIAIJ_Fill(da, A));
739       } else {
740         PetscCall(DMCreateMatrix_DA_3d_MPIAIJ(da, A));
741       }
742     }
743   } else if (baij) {
744     if (dim == 2) {
745       PetscCall(DMCreateMatrix_DA_2d_MPIBAIJ(da, A));
746     } else if (dim == 3) {
747       PetscCall(DMCreateMatrix_DA_3d_MPIBAIJ(da, A));
748     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
749   } else if (sbaij) {
750     if (dim == 2) {
751       PetscCall(DMCreateMatrix_DA_2d_MPISBAIJ(da, A));
752     } else if (dim == 3) {
753       PetscCall(DMCreateMatrix_DA_3d_MPISBAIJ(da, A));
754     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
755   } else if (sell) {
756     if (dim == 2) {
757       PetscCall(DMCreateMatrix_DA_2d_MPISELL(da, A));
758     } else if (dim == 3) {
759       PetscCall(DMCreateMatrix_DA_3d_MPISELL(da, A));
760     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
761   } else if (is) {
762     PetscCall(DMCreateMatrix_DA_IS(da, A));
763   } else {
764     ISLocalToGlobalMapping ltog;
765 
766     PetscCall(MatSetBlockSize(A, dof));
767     PetscCall(MatSetUp(A));
768     PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
769     PetscCall(MatSetLocalToGlobalMapping(A, ltog, ltog));
770   }
771   PetscCall(DMDAGetGhostCorners(da, &starts[0], &starts[1], &starts[2], &dims[0], &dims[1], &dims[2]));
772   PetscCall(MatSetStencil(A, dim, dims, starts, dof));
773   PetscCall(MatSetDM(A, da));
774   PetscCallMPI(MPI_Comm_size(comm, &size));
775   if (size > 1) {
776     /* change viewer to display matrix in natural ordering */
777     PetscCall(MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA));
778     PetscCall(MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA));
779   }
780   *J = A;
781   PetscFunctionReturn(PETSC_SUCCESS);
782 }
783 
784 PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]);
785 
786 PetscErrorCode DMCreateMatrix_DA_IS(DM dm, Mat J)
787 {
788   DM_DA                 *da = (DM_DA *)dm->data;
789   Mat                    lJ, P;
790   ISLocalToGlobalMapping ltog;
791   IS                     is;
792   PetscBT                bt;
793   const PetscInt        *e_loc, *idx;
794   PetscInt               i, nel, nen, nv, dof, *gidx, n, N;
795 
796   /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted)
797      We need to filter out the local indices that are not represented through the DMDAGetElements decomposition */
798   PetscFunctionBegin;
799   dof = da->w;
800   PetscCall(MatSetBlockSize(J, dof));
801   PetscCall(DMGetLocalToGlobalMapping(dm, &ltog));
802 
803   /* flag local elements indices in local DMDA numbering */
804   PetscCall(ISLocalToGlobalMappingGetSize(ltog, &nv));
805   PetscCall(PetscBTCreate(nv / dof, &bt));
806   PetscCall(DMDAGetElements(dm, &nel, &nen, &e_loc)); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */
807   for (i = 0; i < nel * nen; i++) PetscCall(PetscBTSet(bt, e_loc[i]));
808 
809   /* filter out (set to -1) the global indices not used by the local elements */
810   PetscCall(PetscMalloc1(nv / dof, &gidx));
811   PetscCall(ISLocalToGlobalMappingGetBlockIndices(ltog, &idx));
812   PetscCall(PetscArraycpy(gidx, idx, nv / dof));
813   PetscCall(ISLocalToGlobalMappingRestoreBlockIndices(ltog, &idx));
814   for (i = 0; i < nv / dof; i++)
815     if (!PetscBTLookup(bt, i)) gidx[i] = -1;
816   PetscCall(PetscBTDestroy(&bt));
817   PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)dm), dof, nv / dof, gidx, PETSC_OWN_POINTER, &is));
818   PetscCall(ISLocalToGlobalMappingCreateIS(is, &ltog));
819   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
820   PetscCall(ISLocalToGlobalMappingDestroy(&ltog));
821   PetscCall(ISDestroy(&is));
822 
823   /* Preallocation */
824   if (dm->prealloc_skip) {
825     PetscCall(MatSetUp(J));
826   } else {
827     PetscCall(MatISGetLocalMat(J, &lJ));
828     PetscCall(MatGetLocalToGlobalMapping(lJ, &ltog, NULL));
829     PetscCall(MatCreate(PetscObjectComm((PetscObject)lJ), &P));
830     PetscCall(MatSetType(P, MATPREALLOCATOR));
831     PetscCall(MatSetLocalToGlobalMapping(P, ltog, ltog));
832     PetscCall(MatGetSize(lJ, &N, NULL));
833     PetscCall(MatGetLocalSize(lJ, &n, NULL));
834     PetscCall(MatSetSizes(P, n, n, N, N));
835     PetscCall(MatSetBlockSize(P, dof));
836     PetscCall(MatSetUp(P));
837     for (i = 0; i < nel; i++) PetscCall(MatSetValuesBlockedLocal(P, nen, e_loc + i * nen, nen, e_loc + i * nen, NULL, INSERT_VALUES));
838     PetscCall(MatPreallocatorPreallocate(P, (PetscBool)!da->prealloc_only, lJ));
839     PetscCall(MatISRestoreLocalMat(J, &lJ));
840     PetscCall(DMDARestoreElements(dm, &nel, &nen, &e_loc));
841     PetscCall(MatDestroy(&P));
842 
843     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
844     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
845   }
846   PetscFunctionReturn(PETSC_SUCCESS);
847 }
848 
849 PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da, Mat J)
850 {
851   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p;
852   PetscInt               lstart, lend, pstart, pend, *dnz, *onz;
853   MPI_Comm               comm;
854   PetscScalar           *values;
855   DMBoundaryType         bx, by;
856   ISLocalToGlobalMapping ltog;
857   DMDAStencilType        st;
858 
859   PetscFunctionBegin;
860   /*
861          nc - number of components per grid point
862          col - number of colors needed in one direction for single component problem
863   */
864   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st));
865   col = 2 * s + 1;
866   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
867   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
868   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
869 
870   PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols));
871   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
872 
873   PetscCall(MatSetBlockSize(J, nc));
874   /* determine the matrix preallocation information */
875   MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
876   for (i = xs; i < xs + nx; i++) {
877     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
878     pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
879 
880     for (j = ys; j < ys + ny; j++) {
881       slot = i - gxs + gnx * (j - gys);
882 
883       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
884       lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
885 
886       cnt = 0;
887       for (k = 0; k < nc; k++) {
888         for (l = lstart; l < lend + 1; l++) {
889           for (p = pstart; p < pend + 1; p++) {
890             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
891               cols[cnt++] = k + nc * (slot + gnx * l + p);
892             }
893           }
894         }
895         rows[k] = k + nc * (slot);
896       }
897       PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
898     }
899   }
900   PetscCall(MatSetBlockSize(J, nc));
901   PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz));
902   PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz));
903   MatPreallocateEnd(dnz, onz);
904 
905   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
906 
907   /*
908     For each node in the grid: we get the neighbors in the local (on processor ordering
909     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
910     PETSc ordering.
911   */
912   if (!da->prealloc_only) {
913     PetscCall(PetscCalloc1(col * col * nc * nc, &values));
914     for (i = xs; i < xs + nx; i++) {
915       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
916       pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
917 
918       for (j = ys; j < ys + ny; j++) {
919         slot = i - gxs + gnx * (j - gys);
920 
921         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
922         lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
923 
924         cnt = 0;
925         for (k = 0; k < nc; k++) {
926           for (l = lstart; l < lend + 1; l++) {
927             for (p = pstart; p < pend + 1; p++) {
928               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
929                 cols[cnt++] = k + nc * (slot + gnx * l + p);
930               }
931             }
932           }
933           rows[k] = k + nc * (slot);
934         }
935         PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES));
936       }
937     }
938     PetscCall(PetscFree(values));
939     /* do not copy values to GPU since they are all zero and not yet needed there */
940     PetscCall(MatBindToCPU(J, PETSC_TRUE));
941     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
942     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
943     PetscCall(MatBindToCPU(J, PETSC_FALSE));
944     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
945   }
946   PetscCall(PetscFree2(rows, cols));
947   PetscFunctionReturn(PETSC_SUCCESS);
948 }
949 
950 PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da, Mat J)
951 {
952   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
953   PetscInt               m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL;
954   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
955   MPI_Comm               comm;
956   PetscScalar           *values;
957   DMBoundaryType         bx, by, bz;
958   ISLocalToGlobalMapping ltog;
959   DMDAStencilType        st;
960 
961   PetscFunctionBegin;
962   /*
963          nc - number of components per grid point
964          col - number of colors needed in one direction for single component problem
965   */
966   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
967   col = 2 * s + 1;
968   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
969   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
970   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
971 
972   PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols));
973   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
974 
975   PetscCall(MatSetBlockSize(J, nc));
976   /* determine the matrix preallocation information */
977   MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
978   for (i = xs; i < xs + nx; i++) {
979     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
980     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
981     for (j = ys; j < ys + ny; j++) {
982       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
983       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
984       for (k = zs; k < zs + nz; k++) {
985         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
986         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
987 
988         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
989 
990         cnt = 0;
991         for (l = 0; l < nc; l++) {
992           for (ii = istart; ii < iend + 1; ii++) {
993             for (jj = jstart; jj < jend + 1; jj++) {
994               for (kk = kstart; kk < kend + 1; kk++) {
995                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
996                   cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
997                 }
998               }
999             }
1000           }
1001           rows[l] = l + nc * (slot);
1002         }
1003         PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1004       }
1005     }
1006   }
1007   PetscCall(MatSetBlockSize(J, nc));
1008   PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz));
1009   PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz));
1010   MatPreallocateEnd(dnz, onz);
1011   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1012 
1013   /*
1014     For each node in the grid: we get the neighbors in the local (on processor ordering
1015     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1016     PETSc ordering.
1017   */
1018   if (!da->prealloc_only) {
1019     PetscCall(PetscCalloc1(col * col * col * nc * nc * nc, &values));
1020     for (i = xs; i < xs + nx; i++) {
1021       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1022       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1023       for (j = ys; j < ys + ny; j++) {
1024         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1025         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1026         for (k = zs; k < zs + nz; k++) {
1027           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1028           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1029 
1030           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1031 
1032           cnt = 0;
1033           for (l = 0; l < nc; l++) {
1034             for (ii = istart; ii < iend + 1; ii++) {
1035               for (jj = jstart; jj < jend + 1; jj++) {
1036                 for (kk = kstart; kk < kend + 1; kk++) {
1037                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1038                     cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
1039                   }
1040                 }
1041               }
1042             }
1043             rows[l] = l + nc * (slot);
1044           }
1045           PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES));
1046         }
1047       }
1048     }
1049     PetscCall(PetscFree(values));
1050     /* do not copy values to GPU since they are all zero and not yet needed there */
1051     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1052     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1053     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1054     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1055     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1056   }
1057   PetscCall(PetscFree2(rows, cols));
1058   PetscFunctionReturn(PETSC_SUCCESS);
1059 }
1060 
1061 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da, Mat J)
1062 {
1063   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, M, N;
1064   PetscInt               lstart, lend, pstart, pend, *dnz, *onz;
1065   MPI_Comm               comm;
1066   DMBoundaryType         bx, by;
1067   ISLocalToGlobalMapping ltog, mltog;
1068   DMDAStencilType        st;
1069   PetscBool              removedups = PETSC_FALSE, alreadyboundtocpu = PETSC_TRUE;
1070 
1071   PetscFunctionBegin;
1072   /*
1073          nc - number of components per grid point
1074          col - number of colors needed in one direction for single component problem
1075   */
1076   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st));
1077   if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE));
1078   col = 2 * s + 1;
1079   /*
1080        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1081        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1082   */
1083   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1084   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1085   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1086   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1087   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1088 
1089   PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols));
1090   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1091 
1092   PetscCall(MatSetBlockSize(J, nc));
1093   /* determine the matrix preallocation information */
1094   MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
1095   for (i = xs; i < xs + nx; i++) {
1096     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1097     pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1098 
1099     for (j = ys; j < ys + ny; j++) {
1100       slot = i - gxs + gnx * (j - gys);
1101 
1102       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1103       lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1104 
1105       cnt = 0;
1106       for (k = 0; k < nc; k++) {
1107         for (l = lstart; l < lend + 1; l++) {
1108           for (p = pstart; p < pend + 1; p++) {
1109             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
1110               cols[cnt++] = k + nc * (slot + gnx * l + p);
1111             }
1112           }
1113         }
1114         rows[k] = k + nc * (slot);
1115       }
1116       if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1117       else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1118     }
1119   }
1120   PetscCall(MatSetBlockSize(J, nc));
1121   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
1122   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
1123   MatPreallocateEnd(dnz, onz);
1124   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1125   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1126 
1127   /*
1128     For each node in the grid: we get the neighbors in the local (on processor ordering
1129     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1130     PETSc ordering.
1131   */
1132   if (!da->prealloc_only) {
1133     for (i = xs; i < xs + nx; i++) {
1134       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1135       pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1136 
1137       for (j = ys; j < ys + ny; j++) {
1138         slot = i - gxs + gnx * (j - gys);
1139 
1140         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1141         lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1142 
1143         cnt = 0;
1144         for (l = lstart; l < lend + 1; l++) {
1145           for (p = pstart; p < pend + 1; p++) {
1146             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
1147               cols[cnt++] = nc * (slot + gnx * l + p);
1148               for (k = 1; k < nc; k++) {
1149                 cols[cnt] = 1 + cols[cnt - 1];
1150                 cnt++;
1151               }
1152             }
1153           }
1154         }
1155         for (k = 0; k < nc; k++) rows[k] = k + nc * (slot);
1156         PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1157       }
1158     }
1159     /* do not copy values to GPU since they are all zero and not yet needed there */
1160     PetscCall(MatBoundToCPU(J, &alreadyboundtocpu));
1161     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1162     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1163     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1164     if (!alreadyboundtocpu) PetscCall(MatBindToCPU(J, PETSC_FALSE));
1165     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1166     if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1167   }
1168   PetscCall(PetscFree2(rows, cols));
1169   PetscFunctionReturn(PETSC_SUCCESS);
1170 }
1171 
1172 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da, Mat J)
1173 {
1174   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1175   PetscInt               m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, M, N;
1176   PetscInt               lstart, lend, pstart, pend, *dnz, *onz;
1177   DM_DA                 *dd = (DM_DA *)da->data;
1178   PetscInt               ifill_col, *ofill = dd->ofill, *dfill = dd->dfill;
1179   MPI_Comm               comm;
1180   DMBoundaryType         bx, by;
1181   ISLocalToGlobalMapping ltog;
1182   DMDAStencilType        st;
1183   PetscBool              removedups = PETSC_FALSE;
1184 
1185   PetscFunctionBegin;
1186   /*
1187          nc - number of components per grid point
1188          col - number of colors needed in one direction for single component problem
1189   */
1190   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st));
1191   col = 2 * s + 1;
1192   /*
1193        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1194        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1195   */
1196   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1197   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1198   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1199   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1200   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1201 
1202   PetscCall(PetscMalloc1(col * col * nc, &cols));
1203   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1204 
1205   PetscCall(MatSetBlockSize(J, nc));
1206   /* determine the matrix preallocation information */
1207   MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
1208   for (i = xs; i < xs + nx; i++) {
1209     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1210     pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1211 
1212     for (j = ys; j < ys + ny; j++) {
1213       slot = i - gxs + gnx * (j - gys);
1214 
1215       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1216       lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1217 
1218       for (k = 0; k < nc; k++) {
1219         cnt = 0;
1220         for (l = lstart; l < lend + 1; l++) {
1221           for (p = pstart; p < pend + 1; p++) {
1222             if (l || p) {
1223               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */
1224                 for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p);
1225               }
1226             } else {
1227               if (dfill) {
1228                 for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p);
1229               } else {
1230                 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p);
1231               }
1232             }
1233           }
1234         }
1235         row    = k + nc * (slot);
1236         maxcnt = PetscMax(maxcnt, cnt);
1237         if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
1238         else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
1239       }
1240     }
1241   }
1242   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
1243   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
1244   MatPreallocateEnd(dnz, onz);
1245   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1246 
1247   /*
1248     For each node in the grid: we get the neighbors in the local (on processor ordering
1249     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1250     PETSc ordering.
1251   */
1252   if (!da->prealloc_only) {
1253     for (i = xs; i < xs + nx; i++) {
1254       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1255       pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1256 
1257       for (j = ys; j < ys + ny; j++) {
1258         slot = i - gxs + gnx * (j - gys);
1259 
1260         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1261         lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1262 
1263         for (k = 0; k < nc; k++) {
1264           cnt = 0;
1265           for (l = lstart; l < lend + 1; l++) {
1266             for (p = pstart; p < pend + 1; p++) {
1267               if (l || p) {
1268                 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */
1269                   for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p);
1270                 }
1271               } else {
1272                 if (dfill) {
1273                   for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p);
1274                 } else {
1275                   for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p);
1276                 }
1277               }
1278             }
1279           }
1280           row = k + nc * (slot);
1281           PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1282         }
1283       }
1284     }
1285     /* do not copy values to GPU since they are all zero and not yet needed there */
1286     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1287     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1288     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1289     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1290     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1291   }
1292   PetscCall(PetscFree(cols));
1293   PetscFunctionReturn(PETSC_SUCCESS);
1294 }
1295 
1296 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da, Mat J)
1297 {
1298   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1299   PetscInt               m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL;
1300   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
1301   MPI_Comm               comm;
1302   DMBoundaryType         bx, by, bz;
1303   ISLocalToGlobalMapping ltog, mltog;
1304   DMDAStencilType        st;
1305   PetscBool              removedups = PETSC_FALSE;
1306 
1307   PetscFunctionBegin;
1308   /*
1309          nc - number of components per grid point
1310          col - number of colors needed in one direction for single component problem
1311   */
1312   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
1313   if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE));
1314   col = 2 * s + 1;
1315 
1316   /*
1317        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1318        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1319   */
1320   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1321   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1322   if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE;
1323 
1324   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
1325   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
1326   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1327 
1328   PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols));
1329   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1330 
1331   PetscCall(MatSetBlockSize(J, nc));
1332   /* determine the matrix preallocation information */
1333   MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
1334   for (i = xs; i < xs + nx; i++) {
1335     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1336     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1337     for (j = ys; j < ys + ny; j++) {
1338       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1339       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1340       for (k = zs; k < zs + nz; k++) {
1341         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1342         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1343 
1344         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1345 
1346         cnt = 0;
1347         for (l = 0; l < nc; l++) {
1348           for (ii = istart; ii < iend + 1; ii++) {
1349             for (jj = jstart; jj < jend + 1; jj++) {
1350               for (kk = kstart; kk < kend + 1; kk++) {
1351                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1352                   cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
1353                 }
1354               }
1355             }
1356           }
1357           rows[l] = l + nc * (slot);
1358         }
1359         if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1360         else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1361       }
1362     }
1363   }
1364   PetscCall(MatSetBlockSize(J, nc));
1365   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
1366   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
1367   MatPreallocateEnd(dnz, onz);
1368   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1369   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1370 
1371   /*
1372     For each node in the grid: we get the neighbors in the local (on processor ordering
1373     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1374     PETSc ordering.
1375   */
1376   if (!da->prealloc_only) {
1377     for (i = xs; i < xs + nx; i++) {
1378       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1379       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1380       for (j = ys; j < ys + ny; j++) {
1381         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1382         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1383         for (k = zs; k < zs + nz; k++) {
1384           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1385           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1386 
1387           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1388 
1389           cnt = 0;
1390           for (kk = kstart; kk < kend + 1; kk++) {
1391             for (jj = jstart; jj < jend + 1; jj++) {
1392               for (ii = istart; ii < iend + 1; ii++) {
1393                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1394                   cols[cnt++] = nc * (slot + ii + gnx * jj + gnx * gny * kk);
1395                   for (l = 1; l < nc; l++) {
1396                     cols[cnt] = 1 + cols[cnt - 1];
1397                     cnt++;
1398                   }
1399                 }
1400               }
1401             }
1402           }
1403           rows[0] = nc * (slot);
1404           for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1405           PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1406         }
1407       }
1408     }
1409     /* do not copy values to GPU since they are all zero and not yet needed there */
1410     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1411     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1412     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1413     if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1414     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1415     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1416   }
1417   PetscCall(PetscFree2(rows, cols));
1418   PetscFunctionReturn(PETSC_SUCCESS);
1419 }
1420 
1421 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da, Mat J)
1422 {
1423   DM_DA                 *dd = (DM_DA *)da->data;
1424   PetscInt               xs, nx, i, j, gxs, gnx, row, k, l;
1425   PetscInt               m, dim, s, *cols = NULL, nc, cnt, maxcnt = 0, *ocols;
1426   PetscInt              *ofill = dd->ofill, *dfill = dd->dfill;
1427   DMBoundaryType         bx;
1428   ISLocalToGlobalMapping ltog;
1429   PetscMPIInt            rank, size;
1430 
1431   PetscFunctionBegin;
1432   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)da), &rank));
1433   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size));
1434 
1435   /*
1436          nc - number of components per grid point
1437   */
1438   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
1439   PetscCheck(s <= 1, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Matrix creation for 1d not implemented correctly for stencil width larger than 1");
1440   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
1441   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
1442 
1443   PetscCall(MatSetBlockSize(J, nc));
1444   PetscCall(PetscCalloc2(nx * nc, &cols, nx * nc, &ocols));
1445 
1446   /*
1447         note should be smaller for first and last process with no periodic
1448         does not handle dfill
1449   */
1450   cnt = 0;
1451   /* coupling with process to the left */
1452   for (i = 0; i < s; i++) {
1453     for (j = 0; j < nc; j++) {
1454       ocols[cnt] = ((rank == 0) ? 0 : (s - i) * (ofill[j + 1] - ofill[j]));
1455       cols[cnt]  = dfill[j + 1] - dfill[j] + (s + i) * (ofill[j + 1] - ofill[j]);
1456       if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1457         if (size > 1) ocols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]);
1458         else cols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]);
1459       }
1460       maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1461       cnt++;
1462     }
1463   }
1464   for (i = s; i < nx - s; i++) {
1465     for (j = 0; j < nc; j++) {
1466       cols[cnt] = dfill[j + 1] - dfill[j] + 2 * s * (ofill[j + 1] - ofill[j]);
1467       maxcnt    = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1468       cnt++;
1469     }
1470   }
1471   /* coupling with process to the right */
1472   for (i = nx - s; i < nx; i++) {
1473     for (j = 0; j < nc; j++) {
1474       ocols[cnt] = ((rank == (size - 1)) ? 0 : (i - nx + s + 1) * (ofill[j + 1] - ofill[j]));
1475       cols[cnt]  = dfill[j + 1] - dfill[j] + (s + nx - i - 1) * (ofill[j + 1] - ofill[j]);
1476       if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1477         if (size > 1) ocols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]);
1478         else cols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]);
1479       }
1480       maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1481       cnt++;
1482     }
1483   }
1484 
1485   PetscCall(MatSeqAIJSetPreallocation(J, 0, cols));
1486   PetscCall(MatMPIAIJSetPreallocation(J, 0, cols, 0, ocols));
1487   PetscCall(PetscFree2(cols, ocols));
1488 
1489   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1490   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1491 
1492   /*
1493     For each node in the grid: we get the neighbors in the local (on processor ordering
1494     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1495     PETSc ordering.
1496   */
1497   if (!da->prealloc_only) {
1498     PetscCall(PetscMalloc1(maxcnt, &cols));
1499     row = xs * nc;
1500     /* coupling with process to the left */
1501     for (i = xs; i < xs + s; i++) {
1502       for (j = 0; j < nc; j++) {
1503         cnt = 0;
1504         if (rank) {
1505           for (l = 0; l < s; l++) {
1506             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1507           }
1508         }
1509         if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1510           for (l = 0; l < s; l++) {
1511             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (m + i - s - l) * nc + ofill[k];
1512           }
1513         }
1514         if (dfill) {
1515           for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1516         } else {
1517           for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1518         }
1519         for (l = 0; l < s; l++) {
1520           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1521         }
1522         PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1523         row++;
1524       }
1525     }
1526     for (i = xs + s; i < xs + nx - s; i++) {
1527       for (j = 0; j < nc; j++) {
1528         cnt = 0;
1529         for (l = 0; l < s; l++) {
1530           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1531         }
1532         if (dfill) {
1533           for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1534         } else {
1535           for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1536         }
1537         for (l = 0; l < s; l++) {
1538           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1539         }
1540         PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1541         row++;
1542       }
1543     }
1544     /* coupling with process to the right */
1545     for (i = xs + nx - s; i < xs + nx; i++) {
1546       for (j = 0; j < nc; j++) {
1547         cnt = 0;
1548         for (l = 0; l < s; l++) {
1549           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1550         }
1551         if (dfill) {
1552           for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1553         } else {
1554           for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1555         }
1556         if (rank < size - 1) {
1557           for (l = 0; l < s; l++) {
1558             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1559           }
1560         }
1561         if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1562           for (l = 0; l < s; l++) {
1563             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s - l - m + 2) * nc + ofill[k];
1564           }
1565         }
1566         PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1567         row++;
1568       }
1569     }
1570     PetscCall(PetscFree(cols));
1571     /* do not copy values to GPU since they are all zero and not yet needed there */
1572     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1573     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1574     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1575     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1576     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1577   }
1578   PetscFunctionReturn(PETSC_SUCCESS);
1579 }
1580 
1581 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da, Mat J)
1582 {
1583   PetscInt               xs, nx, i, i1, slot, gxs, gnx;
1584   PetscInt               m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l;
1585   PetscInt               istart, iend;
1586   DMBoundaryType         bx;
1587   ISLocalToGlobalMapping ltog, mltog;
1588 
1589   PetscFunctionBegin;
1590   /*
1591          nc - number of components per grid point
1592          col - number of colors needed in one direction for single component problem
1593   */
1594   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
1595   if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE));
1596   col = 2 * s + 1;
1597 
1598   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
1599   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
1600 
1601   PetscCall(MatSetBlockSize(J, nc));
1602   PetscCall(MatSeqAIJSetPreallocation(J, col * nc, NULL));
1603   PetscCall(MatMPIAIJSetPreallocation(J, col * nc, NULL, col * nc, NULL));
1604 
1605   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1606   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1607   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1608 
1609   /*
1610     For each node in the grid: we get the neighbors in the local (on processor ordering
1611     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1612     PETSc ordering.
1613   */
1614   if (!da->prealloc_only) {
1615     PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols));
1616     for (i = xs; i < xs + nx; i++) {
1617       istart = PetscMax(-s, gxs - i);
1618       iend   = PetscMin(s, gxs + gnx - i - 1);
1619       slot   = i - gxs;
1620 
1621       cnt = 0;
1622       for (i1 = istart; i1 < iend + 1; i1++) {
1623         cols[cnt++] = nc * (slot + i1);
1624         for (l = 1; l < nc; l++) {
1625           cols[cnt] = 1 + cols[cnt - 1];
1626           cnt++;
1627         }
1628       }
1629       rows[0] = nc * (slot);
1630       for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1631       PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1632     }
1633     /* do not copy values to GPU since they are all zero and not yet needed there */
1634     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1635     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1636     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1637     if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1638     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1639     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1640     PetscCall(PetscFree2(rows, cols));
1641   }
1642   PetscFunctionReturn(PETSC_SUCCESS);
1643 }
1644 
1645 PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM da, Mat J)
1646 {
1647   PetscInt               xs, nx, i, i1, slot, gxs, gnx;
1648   PetscInt               m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l;
1649   PetscInt               istart, iend;
1650   DMBoundaryType         bx;
1651   ISLocalToGlobalMapping ltog, mltog;
1652 
1653   PetscFunctionBegin;
1654   /*
1655          nc - number of components per grid point
1656          col - number of colors needed in one direction for single component problem
1657   */
1658   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
1659   col = 2 * s + 1;
1660 
1661   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
1662   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
1663 
1664   PetscCall(MatSetBlockSize(J, nc));
1665   PetscCall(MatSeqAIJSetTotalPreallocation(J, nx * nc * col * nc));
1666 
1667   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1668   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1669   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1670 
1671   /*
1672     For each node in the grid: we get the neighbors in the local (on processor ordering
1673     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1674     PETSc ordering.
1675   */
1676   if (!da->prealloc_only) {
1677     PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols));
1678     for (i = xs; i < xs + nx; i++) {
1679       istart = PetscMax(-s, gxs - i);
1680       iend   = PetscMin(s, gxs + gnx - i - 1);
1681       slot   = i - gxs;
1682 
1683       cnt = 0;
1684       for (i1 = istart; i1 < iend + 1; i1++) {
1685         cols[cnt++] = nc * (slot + i1);
1686         for (l = 1; l < nc; l++) {
1687           cols[cnt] = 1 + cols[cnt - 1];
1688           cnt++;
1689         }
1690       }
1691       rows[0] = nc * (slot);
1692       for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1693       PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1694     }
1695     /* do not copy values to GPU since they are all zero and not yet needed there */
1696     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1697     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1698     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1699     if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1700     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1701     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1702     PetscCall(PetscFree2(rows, cols));
1703   }
1704   PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1705   PetscFunctionReturn(PETSC_SUCCESS);
1706 }
1707 
1708 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da, Mat J)
1709 {
1710   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1711   PetscInt               m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz;
1712   PetscInt               istart, iend, jstart, jend, ii, jj;
1713   MPI_Comm               comm;
1714   PetscScalar           *values;
1715   DMBoundaryType         bx, by;
1716   DMDAStencilType        st;
1717   ISLocalToGlobalMapping ltog;
1718 
1719   PetscFunctionBegin;
1720   /*
1721      nc - number of components per grid point
1722      col - number of colors needed in one direction for single component problem
1723   */
1724   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st));
1725   col = 2 * s + 1;
1726 
1727   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1728   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1729   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1730 
1731   PetscCall(PetscMalloc1(col * col * nc * nc, &cols));
1732 
1733   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1734 
1735   /* determine the matrix preallocation information */
1736   MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz);
1737   for (i = xs; i < xs + nx; i++) {
1738     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1739     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1740     for (j = ys; j < ys + ny; j++) {
1741       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1742       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1743       slot   = i - gxs + gnx * (j - gys);
1744 
1745       /* Find block columns in block row */
1746       cnt = 0;
1747       for (ii = istart; ii < iend + 1; ii++) {
1748         for (jj = jstart; jj < jend + 1; jj++) {
1749           if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1750             cols[cnt++] = slot + ii + gnx * jj;
1751           }
1752         }
1753       }
1754       PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz));
1755     }
1756   }
1757   PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz));
1758   PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
1759   MatPreallocateEnd(dnz, onz);
1760 
1761   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1762 
1763   /*
1764     For each node in the grid: we get the neighbors in the local (on processor ordering
1765     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1766     PETSc ordering.
1767   */
1768   if (!da->prealloc_only) {
1769     PetscCall(PetscCalloc1(col * col * nc * nc, &values));
1770     for (i = xs; i < xs + nx; i++) {
1771       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1772       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1773       for (j = ys; j < ys + ny; j++) {
1774         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1775         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1776         slot   = i - gxs + gnx * (j - gys);
1777         cnt    = 0;
1778         for (ii = istart; ii < iend + 1; ii++) {
1779           for (jj = jstart; jj < jend + 1; jj++) {
1780             if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1781               cols[cnt++] = slot + ii + gnx * jj;
1782             }
1783           }
1784         }
1785         PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
1786       }
1787     }
1788     PetscCall(PetscFree(values));
1789     /* do not copy values to GPU since they are all zero and not yet needed there */
1790     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1791     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1792     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1793     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1794     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1795   }
1796   PetscCall(PetscFree(cols));
1797   PetscFunctionReturn(PETSC_SUCCESS);
1798 }
1799 
1800 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da, Mat J)
1801 {
1802   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1803   PetscInt               m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz;
1804   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk;
1805   MPI_Comm               comm;
1806   PetscScalar           *values;
1807   DMBoundaryType         bx, by, bz;
1808   DMDAStencilType        st;
1809   ISLocalToGlobalMapping ltog;
1810 
1811   PetscFunctionBegin;
1812   /*
1813          nc - number of components per grid point
1814          col - number of colors needed in one direction for single component problem
1815   */
1816   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st));
1817   col = 2 * s + 1;
1818 
1819   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
1820   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
1821   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1822 
1823   PetscCall(PetscMalloc1(col * col * col, &cols));
1824 
1825   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1826 
1827   /* determine the matrix preallocation information */
1828   MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz);
1829   for (i = xs; i < xs + nx; i++) {
1830     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1831     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1832     for (j = ys; j < ys + ny; j++) {
1833       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1834       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1835       for (k = zs; k < zs + nz; k++) {
1836         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1837         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1838 
1839         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1840 
1841         /* Find block columns in block row */
1842         cnt = 0;
1843         for (ii = istart; ii < iend + 1; ii++) {
1844           for (jj = jstart; jj < jend + 1; jj++) {
1845             for (kk = kstart; kk < kend + 1; kk++) {
1846               if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1847                 cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
1848               }
1849             }
1850           }
1851         }
1852         PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz));
1853       }
1854     }
1855   }
1856   PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz));
1857   PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
1858   MatPreallocateEnd(dnz, onz);
1859 
1860   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1861 
1862   /*
1863     For each node in the grid: we get the neighbors in the local (on processor ordering
1864     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1865     PETSc ordering.
1866   */
1867   if (!da->prealloc_only) {
1868     PetscCall(PetscCalloc1(col * col * col * nc * nc, &values));
1869     for (i = xs; i < xs + nx; i++) {
1870       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1871       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1872       for (j = ys; j < ys + ny; j++) {
1873         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1874         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1875         for (k = zs; k < zs + nz; k++) {
1876           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1877           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1878 
1879           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1880 
1881           cnt = 0;
1882           for (ii = istart; ii < iend + 1; ii++) {
1883             for (jj = jstart; jj < jend + 1; jj++) {
1884               for (kk = kstart; kk < kend + 1; kk++) {
1885                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1886                   cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
1887                 }
1888               }
1889             }
1890           }
1891           PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
1892         }
1893       }
1894     }
1895     PetscCall(PetscFree(values));
1896     /* do not copy values to GPU since they are all zero and not yet needed there */
1897     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1898     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1899     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1900     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1901     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1902   }
1903   PetscCall(PetscFree(cols));
1904   PetscFunctionReturn(PETSC_SUCCESS);
1905 }
1906 
1907 /*
1908   This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to
1909   identify in the local ordering with periodic domain.
1910 */
1911 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog, PetscInt *row, PetscInt *cnt, PetscInt col[])
1912 {
1913   PetscInt i, n;
1914 
1915   PetscFunctionBegin;
1916   PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, 1, row, row));
1917   PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, *cnt, col, col));
1918   for (i = 0, n = 0; i < *cnt; i++) {
1919     if (col[i] >= *row) col[n++] = col[i];
1920   }
1921   *cnt = n;
1922   PetscFunctionReturn(PETSC_SUCCESS);
1923 }
1924 
1925 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da, Mat J)
1926 {
1927   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1928   PetscInt               m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz;
1929   PetscInt               istart, iend, jstart, jend, ii, jj;
1930   MPI_Comm               comm;
1931   PetscScalar           *values;
1932   DMBoundaryType         bx, by;
1933   DMDAStencilType        st;
1934   ISLocalToGlobalMapping ltog;
1935 
1936   PetscFunctionBegin;
1937   /*
1938      nc - number of components per grid point
1939      col - number of colors needed in one direction for single component problem
1940   */
1941   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st));
1942   col = 2 * s + 1;
1943 
1944   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1945   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1946   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1947 
1948   PetscCall(PetscMalloc1(col * col * nc * nc, &cols));
1949 
1950   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1951 
1952   /* determine the matrix preallocation information */
1953   MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz);
1954   for (i = xs; i < xs + nx; i++) {
1955     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1956     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1957     for (j = ys; j < ys + ny; j++) {
1958       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1959       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1960       slot   = i - gxs + gnx * (j - gys);
1961 
1962       /* Find block columns in block row */
1963       cnt = 0;
1964       for (ii = istart; ii < iend + 1; ii++) {
1965         for (jj = jstart; jj < jend + 1; jj++) {
1966           if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj;
1967         }
1968       }
1969       PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
1970       PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz));
1971     }
1972   }
1973   PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz));
1974   PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
1975   MatPreallocateEnd(dnz, onz);
1976 
1977   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1978 
1979   /*
1980     For each node in the grid: we get the neighbors in the local (on processor ordering
1981     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1982     PETSc ordering.
1983   */
1984   if (!da->prealloc_only) {
1985     PetscCall(PetscCalloc1(col * col * nc * nc, &values));
1986     for (i = xs; i < xs + nx; i++) {
1987       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1988       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1989       for (j = ys; j < ys + ny; j++) {
1990         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1991         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1992         slot   = i - gxs + gnx * (j - gys);
1993 
1994         /* Find block columns in block row */
1995         cnt = 0;
1996         for (ii = istart; ii < iend + 1; ii++) {
1997           for (jj = jstart; jj < jend + 1; jj++) {
1998             if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj;
1999           }
2000         }
2001         PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
2002         PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
2003       }
2004     }
2005     PetscCall(PetscFree(values));
2006     /* do not copy values to GPU since they are all zero and not yet needed there */
2007     PetscCall(MatBindToCPU(J, PETSC_TRUE));
2008     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
2009     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
2010     PetscCall(MatBindToCPU(J, PETSC_FALSE));
2011     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2012   }
2013   PetscCall(PetscFree(cols));
2014   PetscFunctionReturn(PETSC_SUCCESS);
2015 }
2016 
2017 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da, Mat J)
2018 {
2019   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
2020   PetscInt               m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz;
2021   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk;
2022   MPI_Comm               comm;
2023   PetscScalar           *values;
2024   DMBoundaryType         bx, by, bz;
2025   DMDAStencilType        st;
2026   ISLocalToGlobalMapping ltog;
2027 
2028   PetscFunctionBegin;
2029   /*
2030      nc - number of components per grid point
2031      col - number of colors needed in one direction for single component problem
2032   */
2033   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st));
2034   col = 2 * s + 1;
2035 
2036   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
2037   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
2038   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
2039 
2040   /* create the matrix */
2041   PetscCall(PetscMalloc1(col * col * col, &cols));
2042 
2043   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
2044 
2045   /* determine the matrix preallocation information */
2046   MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz);
2047   for (i = xs; i < xs + nx; i++) {
2048     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2049     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2050     for (j = ys; j < ys + ny; j++) {
2051       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2052       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2053       for (k = zs; k < zs + nz; k++) {
2054         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2055         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2056 
2057         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2058 
2059         /* Find block columns in block row */
2060         cnt = 0;
2061         for (ii = istart; ii < iend + 1; ii++) {
2062           for (jj = jstart; jj < jend + 1; jj++) {
2063             for (kk = kstart; kk < kend + 1; kk++) {
2064               if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
2065             }
2066           }
2067         }
2068         PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
2069         PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz));
2070       }
2071     }
2072   }
2073   PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz));
2074   PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
2075   MatPreallocateEnd(dnz, onz);
2076 
2077   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
2078 
2079   /*
2080     For each node in the grid: we get the neighbors in the local (on processor ordering
2081     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2082     PETSc ordering.
2083   */
2084   if (!da->prealloc_only) {
2085     PetscCall(PetscCalloc1(col * col * col * nc * nc, &values));
2086     for (i = xs; i < xs + nx; i++) {
2087       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2088       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2089       for (j = ys; j < ys + ny; j++) {
2090         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2091         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2092         for (k = zs; k < zs + nz; k++) {
2093           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2094           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2095 
2096           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2097 
2098           cnt = 0;
2099           for (ii = istart; ii < iend + 1; ii++) {
2100             for (jj = jstart; jj < jend + 1; jj++) {
2101               for (kk = kstart; kk < kend + 1; kk++) {
2102                 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
2103               }
2104             }
2105           }
2106           PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
2107           PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
2108         }
2109       }
2110     }
2111     PetscCall(PetscFree(values));
2112     /* do not copy values to GPU since they are all zero and not yet needed there */
2113     PetscCall(MatBindToCPU(J, PETSC_TRUE));
2114     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
2115     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
2116     PetscCall(MatBindToCPU(J, PETSC_FALSE));
2117     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2118   }
2119   PetscCall(PetscFree(cols));
2120   PetscFunctionReturn(PETSC_SUCCESS);
2121 }
2122 
2123 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da, Mat J)
2124 {
2125   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
2126   PetscInt               m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, *dnz, *onz;
2127   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
2128   DM_DA                 *dd = (DM_DA *)da->data;
2129   PetscInt               ifill_col, *dfill = dd->dfill, *ofill = dd->ofill;
2130   MPI_Comm               comm;
2131   PetscScalar           *values;
2132   DMBoundaryType         bx, by, bz;
2133   ISLocalToGlobalMapping ltog;
2134   DMDAStencilType        st;
2135   PetscBool              removedups = PETSC_FALSE;
2136 
2137   PetscFunctionBegin;
2138   /*
2139          nc - number of components per grid point
2140          col - number of colors needed in one direction for single component problem
2141   */
2142   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
2143   col = 2 * s + 1;
2144 
2145   /*
2146        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
2147        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
2148   */
2149   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
2150   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
2151   if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE;
2152 
2153   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
2154   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
2155   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
2156 
2157   PetscCall(PetscMalloc1(col * col * col * nc, &cols));
2158   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
2159 
2160   /* determine the matrix preallocation information */
2161   MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
2162 
2163   PetscCall(MatSetBlockSize(J, nc));
2164   for (i = xs; i < xs + nx; i++) {
2165     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2166     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2167     for (j = ys; j < ys + ny; j++) {
2168       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2169       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2170       for (k = zs; k < zs + nz; k++) {
2171         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2172         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2173 
2174         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2175 
2176         for (l = 0; l < nc; l++) {
2177           cnt = 0;
2178           for (ii = istart; ii < iend + 1; ii++) {
2179             for (jj = jstart; jj < jend + 1; jj++) {
2180               for (kk = kstart; kk < kend + 1; kk++) {
2181                 if (ii || jj || kk) {
2182                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
2183                     for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2184                   }
2185                 } else {
2186                   if (dfill) {
2187                     for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2188                   } else {
2189                     for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2190                   }
2191                 }
2192               }
2193             }
2194           }
2195           row    = l + nc * (slot);
2196           maxcnt = PetscMax(maxcnt, cnt);
2197           if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
2198           else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
2199         }
2200       }
2201     }
2202   }
2203   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
2204   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
2205   MatPreallocateEnd(dnz, onz);
2206   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
2207 
2208   /*
2209     For each node in the grid: we get the neighbors in the local (on processor ordering
2210     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2211     PETSc ordering.
2212   */
2213   if (!da->prealloc_only) {
2214     PetscCall(PetscCalloc1(maxcnt, &values));
2215     for (i = xs; i < xs + nx; i++) {
2216       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2217       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2218       for (j = ys; j < ys + ny; j++) {
2219         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2220         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2221         for (k = zs; k < zs + nz; k++) {
2222           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2223           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2224 
2225           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2226 
2227           for (l = 0; l < nc; l++) {
2228             cnt = 0;
2229             for (ii = istart; ii < iend + 1; ii++) {
2230               for (jj = jstart; jj < jend + 1; jj++) {
2231                 for (kk = kstart; kk < kend + 1; kk++) {
2232                   if (ii || jj || kk) {
2233                     if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
2234                       for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2235                     }
2236                   } else {
2237                     if (dfill) {
2238                       for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2239                     } else {
2240                       for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2241                     }
2242                   }
2243                 }
2244               }
2245             }
2246             row = l + nc * (slot);
2247             PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, values, INSERT_VALUES));
2248           }
2249         }
2250       }
2251     }
2252     PetscCall(PetscFree(values));
2253     /* do not copy values to GPU since they are all zero and not yet needed there */
2254     PetscCall(MatBindToCPU(J, PETSC_TRUE));
2255     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
2256     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
2257     PetscCall(MatBindToCPU(J, PETSC_FALSE));
2258     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2259   }
2260   PetscCall(PetscFree(cols));
2261   PetscFunctionReturn(PETSC_SUCCESS);
2262 }
2263