xref: /petsc/src/dm/impls/da/fdda.c (revision 89928cc5142e867cb7b0dd1ff74e0acffcd6b4b5)
1 
2 #include <petsc/private/dmdaimpl.h> /*I      "petscdmda.h"     I*/
3 #include <petscmat.h>
4 #include <petscbt.h>
5 
6 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM, ISColoringType, ISColoring *);
7 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM, ISColoringType, ISColoring *);
8 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM, ISColoringType, ISColoring *);
9 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM, ISColoringType, ISColoring *);
10 
11 /*
12    For ghost i that may be negative or greater than the upper bound this
13   maps it into the 0:m-1 range using periodicity
14 */
15 #define SetInRange(i, m) ((i < 0) ? m + i : ((i >= m) ? i - m : i))
16 
17 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill, PetscInt w, PetscInt **rfill)
18 {
19   PetscInt i, j, nz, *fill;
20 
21   PetscFunctionBegin;
22   if (!dfill) PetscFunctionReturn(0);
23 
24   /* count number nonzeros */
25   nz = 0;
26   for (i = 0; i < w; i++) {
27     for (j = 0; j < w; j++) {
28       if (dfill[w * i + j]) nz++;
29     }
30   }
31   PetscCall(PetscMalloc1(nz + w + 1, &fill));
32   /* construct modified CSR storage of nonzero structure */
33   /*  fill[0 -- w] marks starts of each row of column indices (and end of last row)
34    so fill[1] - fill[0] gives number of nonzeros in first row etc */
35   nz = w + 1;
36   for (i = 0; i < w; i++) {
37     fill[i] = nz;
38     for (j = 0; j < w; j++) {
39       if (dfill[w * i + j]) {
40         fill[nz] = j;
41         nz++;
42       }
43     }
44   }
45   fill[w] = nz;
46 
47   *rfill = fill;
48   PetscFunctionReturn(0);
49 }
50 
51 static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse, PetscInt w, PetscInt **rfill)
52 {
53   PetscInt nz;
54 
55   PetscFunctionBegin;
56   if (!dfillsparse) PetscFunctionReturn(0);
57 
58   /* Determine number of non-zeros */
59   nz = (dfillsparse[w] - w - 1);
60 
61   /* Allocate space for our copy of the given sparse matrix representation. */
62   PetscCall(PetscMalloc1(nz + w + 1, rfill));
63   PetscCall(PetscArraycpy(*rfill, dfillsparse, nz + w + 1));
64   PetscFunctionReturn(0);
65 }
66 
67 static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd)
68 {
69   PetscInt i, k, cnt = 1;
70 
71   PetscFunctionBegin;
72 
73   /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of
74    columns to the left with any nonzeros in them plus 1 */
75   PetscCall(PetscCalloc1(dd->w, &dd->ofillcols));
76   for (i = 0; i < dd->w; i++) {
77     for (k = dd->ofill[i]; k < dd->ofill[i + 1]; k++) dd->ofillcols[dd->ofill[k]] = 1;
78   }
79   for (i = 0; i < dd->w; i++) {
80     if (dd->ofillcols[i]) dd->ofillcols[i] = cnt++;
81   }
82   PetscFunctionReturn(0);
83 }
84 
85 /*@
86     DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem
87     of the matrix returned by DMCreateMatrix().
88 
89     Logically Collective on da
90 
91     Input Parameters:
92 +   da - the distributed array
93 .   dfill - the fill pattern in the diagonal block (may be NULL, means use dense block)
94 -   ofill - the fill pattern in the off-diagonal blocks
95 
96     Level: developer
97 
98     Notes:
99     This only makes sense when you are doing multicomponent problems but using the
100        MPIAIJ matrix format
101 
102            The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries
103        representing coupling and 0 entries for missing coupling. For example
104 $             dfill[9] = {1, 0, 0,
105 $                         1, 1, 0,
106 $                         0, 1, 1}
107        means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with
108        itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the
109        diagonal block).
110 
111      DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
112      can be represented in the dfill, ofill format
113 
114    Contributed by Glenn Hammond
115 
116 .seealso `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()`
117 
118 @*/
119 PetscErrorCode DMDASetBlockFills(DM da, const PetscInt *dfill, const PetscInt *ofill)
120 {
121   DM_DA *dd = (DM_DA *)da->data;
122 
123   PetscFunctionBegin;
124   /* save the given dfill and ofill information */
125   PetscCall(DMDASetBlockFills_Private(dfill, dd->w, &dd->dfill));
126   PetscCall(DMDASetBlockFills_Private(ofill, dd->w, &dd->ofill));
127 
128   /* count nonzeros in ofill columns */
129   PetscCall(DMDASetBlockFills_Private2(dd));
130   PetscFunctionReturn(0);
131 }
132 
133 /*@
134     DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem
135     of the matrix returned by DMCreateMatrix(), using sparse representations
136     of fill patterns.
137 
138     Logically Collective on da
139 
140     Input Parameters:
141 +   da - the distributed array
142 .   dfill - the sparse fill pattern in the diagonal block (may be NULL, means use dense block)
143 -   ofill - the sparse fill pattern in the off-diagonal blocks
144 
145     Level: developer
146 
147     Notes: This only makes sense when you are doing multicomponent problems but using the
148        MPIAIJ matrix format
149 
150            The format for dfill and ofill is a sparse representation of a
151            dof-by-dof matrix with 1 entries representing coupling and 0 entries
152            for missing coupling.  The sparse representation is a 1 dimensional
153            array of length nz + dof + 1, where nz is the number of non-zeros in
154            the matrix.  The first dof entries in the array give the
155            starting array indices of each row's items in the rest of the array,
156            the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array)
157            and the remaining nz items give the column indices of each of
158            the 1s within the logical 2D matrix.  Each row's items within
159            the array are the column indices of the 1s within that row
160            of the 2D matrix.  PETSc developers may recognize that this is the
161            same format as that computed by the DMDASetBlockFills_Private()
162            function from a dense 2D matrix representation.
163 
164      DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
165      can be represented in the dfill, ofill format
166 
167    Contributed by Philip C. Roth
168 
169 .seealso `DMDASetBlockFills()`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()`
170 
171 @*/
172 PetscErrorCode DMDASetBlockFillsSparse(DM da, const PetscInt *dfillsparse, const PetscInt *ofillsparse)
173 {
174   DM_DA *dd = (DM_DA *)da->data;
175 
176   PetscFunctionBegin;
177   /* save the given dfill and ofill information */
178   PetscCall(DMDASetBlockFillsSparse_Private(dfillsparse, dd->w, &dd->dfill));
179   PetscCall(DMDASetBlockFillsSparse_Private(ofillsparse, dd->w, &dd->ofill));
180 
181   /* count nonzeros in ofill columns */
182   PetscCall(DMDASetBlockFills_Private2(dd));
183   PetscFunctionReturn(0);
184 }
185 
186 PetscErrorCode DMCreateColoring_DA(DM da, ISColoringType ctype, ISColoring *coloring)
187 {
188   PetscInt       dim, m, n, p, nc;
189   DMBoundaryType bx, by, bz;
190   MPI_Comm       comm;
191   PetscMPIInt    size;
192   PetscBool      isBAIJ;
193   DM_DA         *dd = (DM_DA *)da->data;
194 
195   PetscFunctionBegin;
196   /*
197                                   m
198           ------------------------------------------------------
199          |                                                     |
200          |                                                     |
201          |               ----------------------                |
202          |               |                    |                |
203       n  |           yn  |                    |                |
204          |               |                    |                |
205          |               .---------------------                |
206          |             (xs,ys)     xn                          |
207          |            .                                        |
208          |         (gxs,gys)                                   |
209          |                                                     |
210           -----------------------------------------------------
211   */
212 
213   /*
214          nc - number of components per grid point
215          col - number of colors needed in one direction for single component problem
216 
217   */
218   PetscCall(DMDAGetInfo(da, &dim, NULL, NULL, NULL, &m, &n, &p, &nc, NULL, &bx, &by, &bz, NULL));
219 
220   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
221   PetscCallMPI(MPI_Comm_size(comm, &size));
222   if (ctype == IS_COLORING_LOCAL) {
223     if (size == 1) {
224       ctype = IS_COLORING_GLOBAL;
225     } else if (dim > 1) {
226       if ((m == 1 && bx == DM_BOUNDARY_PERIODIC) || (n == 1 && by == DM_BOUNDARY_PERIODIC) || (p == 1 && bz == DM_BOUNDARY_PERIODIC)) {
227         SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain  on the same process");
228       }
229     }
230   }
231 
232   /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ
233      matrices is for the blocks, not the individual matrix elements  */
234   PetscCall(PetscStrbeginswith(da->mattype, MATBAIJ, &isBAIJ));
235   if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATMPIBAIJ, &isBAIJ));
236   if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATSEQBAIJ, &isBAIJ));
237   if (isBAIJ) {
238     dd->w  = 1;
239     dd->xs = dd->xs / nc;
240     dd->xe = dd->xe / nc;
241     dd->Xs = dd->Xs / nc;
242     dd->Xe = dd->Xe / nc;
243   }
244 
245   /*
246      We do not provide a getcoloring function in the DMDA operations because
247    the basic DMDA does not know about matrices. We think of DMDA as being
248    more low-level then matrices.
249   */
250   if (dim == 1) PetscCall(DMCreateColoring_DA_1d_MPIAIJ(da, ctype, coloring));
251   else if (dim == 2) PetscCall(DMCreateColoring_DA_2d_MPIAIJ(da, ctype, coloring));
252   else if (dim == 3) PetscCall(DMCreateColoring_DA_3d_MPIAIJ(da, ctype, coloring));
253   else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not done for %" PetscInt_FMT " dimension, send us mail petsc-maint@mcs.anl.gov for code", dim);
254   if (isBAIJ) {
255     dd->w  = nc;
256     dd->xs = dd->xs * nc;
257     dd->xe = dd->xe * nc;
258     dd->Xs = dd->Xs * nc;
259     dd->Xe = dd->Xe * nc;
260   }
261   PetscFunctionReturn(0);
262 }
263 
264 /* ---------------------------------------------------------------------------------*/
265 
266 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
267 {
268   PetscInt         xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, M, N, dim, s, k, nc, col;
269   PetscInt         ncolors;
270   MPI_Comm         comm;
271   DMBoundaryType   bx, by;
272   DMDAStencilType  st;
273   ISColoringValue *colors;
274   DM_DA           *dd = (DM_DA *)da->data;
275 
276   PetscFunctionBegin;
277   /*
278          nc - number of components per grid point
279          col - number of colors needed in one direction for single component problem
280 
281   */
282   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st));
283   col = 2 * s + 1;
284   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
285   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
286   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
287 
288   /* special case as taught to us by Paul Hovland */
289   if (st == DMDA_STENCIL_STAR && s == 1) {
290     PetscCall(DMCreateColoring_DA_2d_5pt_MPIAIJ(da, ctype, coloring));
291   } else {
292     if (ctype == IS_COLORING_GLOBAL) {
293       if (!dd->localcoloring) {
294         PetscCall(PetscMalloc1(nc * nx * ny, &colors));
295         ii = 0;
296         for (j = ys; j < ys + ny; j++) {
297           for (i = xs; i < xs + nx; i++) {
298             for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((i % col) + col * (j % col));
299           }
300         }
301         ncolors = nc + nc * (col - 1 + col * (col - 1));
302         PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring));
303       }
304       *coloring = dd->localcoloring;
305     } else if (ctype == IS_COLORING_LOCAL) {
306       if (!dd->ghostedcoloring) {
307         PetscCall(PetscMalloc1(nc * gnx * gny, &colors));
308         ii = 0;
309         for (j = gys; j < gys + gny; j++) {
310           for (i = gxs; i < gxs + gnx; i++) {
311             for (k = 0; k < nc; k++) {
312               /* the complicated stuff is to handle periodic boundaries */
313               colors[ii++] = k + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col));
314             }
315           }
316         }
317         ncolors = nc + nc * (col - 1 + col * (col - 1));
318         PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
319         /* PetscIntView(ncolors,(PetscInt*)colors,0); */
320 
321         PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
322       }
323       *coloring = dd->ghostedcoloring;
324     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
325   }
326   PetscCall(ISColoringReference(*coloring));
327   PetscFunctionReturn(0);
328 }
329 
330 /* ---------------------------------------------------------------------------------*/
331 
332 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
333 {
334   PetscInt         xs, ys, nx, ny, i, j, gxs, gys, gnx, gny, m, n, p, dim, s, k, nc, col, zs, gzs, ii, l, nz, gnz, M, N, P;
335   PetscInt         ncolors;
336   MPI_Comm         comm;
337   DMBoundaryType   bx, by, bz;
338   DMDAStencilType  st;
339   ISColoringValue *colors;
340   DM_DA           *dd = (DM_DA *)da->data;
341 
342   PetscFunctionBegin;
343   /*
344          nc - number of components per grid point
345          col - number of colors needed in one direction for single component problem
346 
347   */
348   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
349   col = 2 * s + 1;
350   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
351   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
352   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
353 
354   /* create the coloring */
355   if (ctype == IS_COLORING_GLOBAL) {
356     if (!dd->localcoloring) {
357       PetscCall(PetscMalloc1(nc * nx * ny * nz, &colors));
358       ii = 0;
359       for (k = zs; k < zs + nz; k++) {
360         for (j = ys; j < ys + ny; j++) {
361           for (i = xs; i < xs + nx; i++) {
362             for (l = 0; l < nc; l++) colors[ii++] = l + nc * ((i % col) + col * (j % col) + col * col * (k % col));
363           }
364         }
365       }
366       ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1));
367       PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny * nz, colors, PETSC_OWN_POINTER, &dd->localcoloring));
368     }
369     *coloring = dd->localcoloring;
370   } else if (ctype == IS_COLORING_LOCAL) {
371     if (!dd->ghostedcoloring) {
372       PetscCall(PetscMalloc1(nc * gnx * gny * gnz, &colors));
373       ii = 0;
374       for (k = gzs; k < gzs + gnz; k++) {
375         for (j = gys; j < gys + gny; j++) {
376           for (i = gxs; i < gxs + gnx; i++) {
377             for (l = 0; l < nc; l++) {
378               /* the complicated stuff is to handle periodic boundaries */
379               colors[ii++] = l + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col) + col * col * (SetInRange(k, p) % col));
380             }
381           }
382         }
383       }
384       ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1));
385       PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny * gnz, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
386       PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
387     }
388     *coloring = dd->ghostedcoloring;
389   } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
390   PetscCall(ISColoringReference(*coloring));
391   PetscFunctionReturn(0);
392 }
393 
394 /* ---------------------------------------------------------------------------------*/
395 
396 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
397 {
398   PetscInt         xs, nx, i, i1, gxs, gnx, l, m, M, dim, s, nc, col;
399   PetscInt         ncolors;
400   MPI_Comm         comm;
401   DMBoundaryType   bx;
402   ISColoringValue *colors;
403   DM_DA           *dd = (DM_DA *)da->data;
404 
405   PetscFunctionBegin;
406   /*
407          nc - number of components per grid point
408          col - number of colors needed in one direction for single component problem
409 
410   */
411   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, &M, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
412   col = 2 * s + 1;
413   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
414   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
415   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
416 
417   /* create the coloring */
418   if (ctype == IS_COLORING_GLOBAL) {
419     if (!dd->localcoloring) {
420       PetscCall(PetscMalloc1(nc * nx, &colors));
421       if (dd->ofillcols) {
422         PetscInt tc = 0;
423         for (i = 0; i < nc; i++) tc += (PetscInt)(dd->ofillcols[i] > 0);
424         i1 = 0;
425         for (i = xs; i < xs + nx; i++) {
426           for (l = 0; l < nc; l++) {
427             if (dd->ofillcols[l] && (i % col)) {
428               colors[i1++] = nc - 1 + tc * ((i % col) - 1) + dd->ofillcols[l];
429             } else {
430               colors[i1++] = l;
431             }
432           }
433         }
434         ncolors = nc + 2 * s * tc;
435       } else {
436         i1 = 0;
437         for (i = xs; i < xs + nx; i++) {
438           for (l = 0; l < nc; l++) colors[i1++] = l + nc * (i % col);
439         }
440         ncolors = nc + nc * (col - 1);
441       }
442       PetscCall(ISColoringCreate(comm, ncolors, nc * nx, colors, PETSC_OWN_POINTER, &dd->localcoloring));
443     }
444     *coloring = dd->localcoloring;
445   } else if (ctype == IS_COLORING_LOCAL) {
446     if (!dd->ghostedcoloring) {
447       PetscCall(PetscMalloc1(nc * gnx, &colors));
448       i1 = 0;
449       for (i = gxs; i < gxs + gnx; i++) {
450         for (l = 0; l < nc; l++) {
451           /* the complicated stuff is to handle periodic boundaries */
452           colors[i1++] = l + nc * (SetInRange(i, m) % col);
453         }
454       }
455       ncolors = nc + nc * (col - 1);
456       PetscCall(ISColoringCreate(comm, ncolors, nc * gnx, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
457       PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
458     }
459     *coloring = dd->ghostedcoloring;
460   } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
461   PetscCall(ISColoringReference(*coloring));
462   PetscFunctionReturn(0);
463 }
464 
465 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
466 {
467   PetscInt         xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, dim, s, k, nc;
468   PetscInt         ncolors;
469   MPI_Comm         comm;
470   DMBoundaryType   bx, by;
471   ISColoringValue *colors;
472   DM_DA           *dd = (DM_DA *)da->data;
473 
474   PetscFunctionBegin;
475   /*
476          nc - number of components per grid point
477          col - number of colors needed in one direction for single component problem
478 
479   */
480   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, NULL));
481   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
482   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
483   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
484   /* create the coloring */
485   if (ctype == IS_COLORING_GLOBAL) {
486     if (!dd->localcoloring) {
487       PetscCall(PetscMalloc1(nc * nx * ny, &colors));
488       ii = 0;
489       for (j = ys; j < ys + ny; j++) {
490         for (i = xs; i < xs + nx; i++) {
491           for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((3 * j + i) % 5);
492         }
493       }
494       ncolors = 5 * nc;
495       PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring));
496     }
497     *coloring = dd->localcoloring;
498   } else if (ctype == IS_COLORING_LOCAL) {
499     if (!dd->ghostedcoloring) {
500       PetscCall(PetscMalloc1(nc * gnx * gny, &colors));
501       ii = 0;
502       for (j = gys; j < gys + gny; j++) {
503         for (i = gxs; i < gxs + gnx; i++) {
504           for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((3 * SetInRange(j, n) + SetInRange(i, m)) % 5);
505         }
506       }
507       ncolors = 5 * nc;
508       PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
509       PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
510     }
511     *coloring = dd->ghostedcoloring;
512   } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
513   PetscFunctionReturn(0);
514 }
515 
516 /* =========================================================================== */
517 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM, Mat);
518 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM, Mat);
519 extern PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM, Mat);
520 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM, Mat);
521 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM, Mat);
522 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM, Mat);
523 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM, Mat);
524 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM, Mat);
525 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM, Mat);
526 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM, Mat);
527 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM, Mat);
528 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM, Mat);
529 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM, Mat);
530 extern PetscErrorCode DMCreateMatrix_DA_IS(DM, Mat);
531 
532 /*@C
533    MatSetupDM - Sets the DMDA that is to be used by the HYPRE_StructMatrix PETSc matrix
534 
535    Logically Collective on mat
536 
537    Input Parameters:
538 +  mat - the matrix
539 -  da - the da
540 
541    Level: intermediate
542 
543 @*/
544 PetscErrorCode MatSetupDM(Mat mat, DM da)
545 {
546   PetscFunctionBegin;
547   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
548   PetscValidHeaderSpecificType(da, DM_CLASSID, 2, DMDA);
549   PetscTryMethod(mat, "MatSetupDM_C", (Mat, DM), (mat, da));
550   PetscFunctionReturn(0);
551 }
552 
553 PetscErrorCode MatView_MPI_DA(Mat A, PetscViewer viewer)
554 {
555   DM                da;
556   const char       *prefix;
557   Mat               Anatural;
558   AO                ao;
559   PetscInt          rstart, rend, *petsc, i;
560   IS                is;
561   MPI_Comm          comm;
562   PetscViewerFormat format;
563 
564   PetscFunctionBegin;
565   /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */
566   PetscCall(PetscViewerGetFormat(viewer, &format));
567   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(0);
568 
569   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
570   PetscCall(MatGetDM(A, &da));
571   PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA");
572 
573   PetscCall(DMDAGetAO(da, &ao));
574   PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
575   PetscCall(PetscMalloc1(rend - rstart, &petsc));
576   for (i = rstart; i < rend; i++) petsc[i - rstart] = i;
577   PetscCall(AOApplicationToPetsc(ao, rend - rstart, petsc));
578   PetscCall(ISCreateGeneral(comm, rend - rstart, petsc, PETSC_OWN_POINTER, &is));
579 
580   /* call viewer on natural ordering */
581   PetscCall(MatCreateSubMatrix(A, is, is, MAT_INITIAL_MATRIX, &Anatural));
582   PetscCall(ISDestroy(&is));
583   PetscCall(PetscObjectGetOptionsPrefix((PetscObject)A, &prefix));
584   PetscCall(PetscObjectSetOptionsPrefix((PetscObject)Anatural, prefix));
585   PetscCall(PetscObjectSetName((PetscObject)Anatural, ((PetscObject)A)->name));
586   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE;
587   PetscCall(MatView(Anatural, viewer));
588   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE;
589   PetscCall(MatDestroy(&Anatural));
590   PetscFunctionReturn(0);
591 }
592 
593 PetscErrorCode MatLoad_MPI_DA(Mat A, PetscViewer viewer)
594 {
595   DM       da;
596   Mat      Anatural, Aapp;
597   AO       ao;
598   PetscInt rstart, rend, *app, i, m, n, M, N;
599   IS       is;
600   MPI_Comm comm;
601 
602   PetscFunctionBegin;
603   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
604   PetscCall(MatGetDM(A, &da));
605   PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA");
606 
607   /* Load the matrix in natural ordering */
608   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Anatural));
609   PetscCall(MatSetType(Anatural, ((PetscObject)A)->type_name));
610   PetscCall(MatGetSize(A, &M, &N));
611   PetscCall(MatGetLocalSize(A, &m, &n));
612   PetscCall(MatSetSizes(Anatural, m, n, M, N));
613   PetscCall(MatLoad(Anatural, viewer));
614 
615   /* Map natural ordering to application ordering and create IS */
616   PetscCall(DMDAGetAO(da, &ao));
617   PetscCall(MatGetOwnershipRange(Anatural, &rstart, &rend));
618   PetscCall(PetscMalloc1(rend - rstart, &app));
619   for (i = rstart; i < rend; i++) app[i - rstart] = i;
620   PetscCall(AOPetscToApplication(ao, rend - rstart, app));
621   PetscCall(ISCreateGeneral(comm, rend - rstart, app, PETSC_OWN_POINTER, &is));
622 
623   /* Do permutation and replace header */
624   PetscCall(MatCreateSubMatrix(Anatural, is, is, MAT_INITIAL_MATRIX, &Aapp));
625   PetscCall(MatHeaderReplace(A, &Aapp));
626   PetscCall(ISDestroy(&is));
627   PetscCall(MatDestroy(&Anatural));
628   PetscFunctionReturn(0);
629 }
630 
631 PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J)
632 {
633   PetscInt dim, dof, nx, ny, nz, dims[3], starts[3], M, N, P;
634   Mat      A;
635   MPI_Comm comm;
636   MatType  Atype;
637   void (*aij)(void) = NULL, (*baij)(void) = NULL, (*sbaij)(void) = NULL, (*sell)(void) = NULL, (*is)(void) = NULL;
638   MatType     mtype;
639   PetscMPIInt size;
640   DM_DA      *dd = (DM_DA *)da->data;
641 
642   PetscFunctionBegin;
643   PetscCall(MatInitializePackage());
644   mtype = da->mattype;
645 
646   /*
647                                   m
648           ------------------------------------------------------
649          |                                                     |
650          |                                                     |
651          |               ----------------------                |
652          |               |                    |                |
653       n  |           ny  |                    |                |
654          |               |                    |                |
655          |               .---------------------                |
656          |             (xs,ys)     nx                          |
657          |            .                                        |
658          |         (gxs,gys)                                   |
659          |                                                     |
660           -----------------------------------------------------
661   */
662 
663   /*
664          nc - number of components per grid point
665          col - number of colors needed in one direction for single component problem
666 
667   */
668   M   = dd->M;
669   N   = dd->N;
670   P   = dd->P;
671   dim = da->dim;
672   dof = dd->w;
673   /* PetscCall(DMDAGetInfo(da,&dim,&M,&N,&P,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL)); */
674   PetscCall(DMDAGetCorners(da, NULL, NULL, NULL, &nx, &ny, &nz));
675   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
676   PetscCall(MatCreate(comm, &A));
677   PetscCall(MatSetSizes(A, dof * nx * ny * nz, dof * nx * ny * nz, dof * M * N * P, dof * M * N * P));
678   PetscCall(MatSetType(A, mtype));
679   PetscCall(MatSetFromOptions(A));
680   if (dof * nx * ny * nz < da->bind_below) {
681     PetscCall(MatSetBindingPropagates(A, PETSC_TRUE));
682     PetscCall(MatBindToCPU(A, PETSC_TRUE));
683   }
684   PetscCall(MatSetDM(A, da));
685   if (da->structure_only) PetscCall(MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE));
686   PetscCall(MatGetType(A, &Atype));
687   /*
688      We do not provide a getmatrix function in the DMDA operations because
689    the basic DMDA does not know about matrices. We think of DMDA as being more
690    more low-level than matrices. This is kind of cheating but, cause sometimes
691    we think of DMDA has higher level than matrices.
692 
693      We could switch based on Atype (or mtype), but we do not since the
694    specialized setting routines depend only on the particular preallocation
695    details of the matrix, not the type itself.
696   */
697   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJSetPreallocation_C", &aij));
698   if (!aij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJSetPreallocation_C", &aij));
699   if (!aij) {
700     PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIBAIJSetPreallocation_C", &baij));
701     if (!baij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", &baij));
702     if (!baij) {
703       PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPISBAIJSetPreallocation_C", &sbaij));
704       if (!sbaij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqSBAIJSetPreallocation_C", &sbaij));
705       if (!sbaij) {
706         PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPISELLSetPreallocation_C", &sell));
707         if (!sell) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqSELLSetPreallocation_C", &sell));
708       }
709       if (!sell) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatISSetPreallocation_C", &is));
710     }
711   }
712   if (aij) {
713     if (dim == 1) {
714       if (dd->ofill) {
715         PetscCall(DMCreateMatrix_DA_1d_MPIAIJ_Fill(da, A));
716       } else {
717         DMBoundaryType bx;
718         PetscMPIInt    size;
719         PetscCall(DMDAGetInfo(da, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &bx, NULL, NULL, NULL));
720         PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size));
721         if (size == 1 && bx == DM_BOUNDARY_NONE) {
722           PetscCall(DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(da, A));
723         } else {
724           PetscCall(DMCreateMatrix_DA_1d_MPIAIJ(da, A));
725         }
726       }
727     } else if (dim == 2) {
728       if (dd->ofill) {
729         PetscCall(DMCreateMatrix_DA_2d_MPIAIJ_Fill(da, A));
730       } else {
731         PetscCall(DMCreateMatrix_DA_2d_MPIAIJ(da, A));
732       }
733     } else if (dim == 3) {
734       if (dd->ofill) {
735         PetscCall(DMCreateMatrix_DA_3d_MPIAIJ_Fill(da, A));
736       } else {
737         PetscCall(DMCreateMatrix_DA_3d_MPIAIJ(da, A));
738       }
739     }
740   } else if (baij) {
741     if (dim == 2) {
742       PetscCall(DMCreateMatrix_DA_2d_MPIBAIJ(da, A));
743     } else if (dim == 3) {
744       PetscCall(DMCreateMatrix_DA_3d_MPIBAIJ(da, A));
745     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
746   } else if (sbaij) {
747     if (dim == 2) {
748       PetscCall(DMCreateMatrix_DA_2d_MPISBAIJ(da, A));
749     } else if (dim == 3) {
750       PetscCall(DMCreateMatrix_DA_3d_MPISBAIJ(da, A));
751     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
752   } else if (sell) {
753     if (dim == 2) {
754       PetscCall(DMCreateMatrix_DA_2d_MPISELL(da, A));
755     } else if (dim == 3) {
756       PetscCall(DMCreateMatrix_DA_3d_MPISELL(da, A));
757     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
758   } else if (is) {
759     PetscCall(DMCreateMatrix_DA_IS(da, A));
760   } else {
761     ISLocalToGlobalMapping ltog;
762 
763     PetscCall(MatSetBlockSize(A, dof));
764     PetscCall(MatSetUp(A));
765     PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
766     PetscCall(MatSetLocalToGlobalMapping(A, ltog, ltog));
767   }
768   PetscCall(DMDAGetGhostCorners(da, &starts[0], &starts[1], &starts[2], &dims[0], &dims[1], &dims[2]));
769   PetscCall(MatSetStencil(A, dim, dims, starts, dof));
770   PetscCall(MatSetDM(A, da));
771   PetscCallMPI(MPI_Comm_size(comm, &size));
772   if (size > 1) {
773     /* change viewer to display matrix in natural ordering */
774     PetscCall(MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA));
775     PetscCall(MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA));
776   }
777   *J = A;
778   PetscFunctionReturn(0);
779 }
780 
781 /* ---------------------------------------------------------------------------------*/
782 PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]);
783 
784 PetscErrorCode DMCreateMatrix_DA_IS(DM dm, Mat J)
785 {
786   DM_DA                 *da = (DM_DA *)dm->data;
787   Mat                    lJ, P;
788   ISLocalToGlobalMapping ltog;
789   IS                     is;
790   PetscBT                bt;
791   const PetscInt        *e_loc, *idx;
792   PetscInt               i, nel, nen, nv, dof, *gidx, n, N;
793 
794   /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted)
795      We need to filter out the local indices that are not represented through the DMDAGetElements decomposition */
796   PetscFunctionBegin;
797   dof = da->w;
798   PetscCall(MatSetBlockSize(J, dof));
799   PetscCall(DMGetLocalToGlobalMapping(dm, &ltog));
800 
801   /* flag local elements indices in local DMDA numbering */
802   PetscCall(ISLocalToGlobalMappingGetSize(ltog, &nv));
803   PetscCall(PetscBTCreate(nv / dof, &bt));
804   PetscCall(DMDAGetElements(dm, &nel, &nen, &e_loc)); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */
805   for (i = 0; i < nel * nen; i++) PetscCall(PetscBTSet(bt, e_loc[i]));
806 
807   /* filter out (set to -1) the global indices not used by the local elements */
808   PetscCall(PetscMalloc1(nv / dof, &gidx));
809   PetscCall(ISLocalToGlobalMappingGetBlockIndices(ltog, &idx));
810   PetscCall(PetscArraycpy(gidx, idx, nv / dof));
811   PetscCall(ISLocalToGlobalMappingRestoreBlockIndices(ltog, &idx));
812   for (i = 0; i < nv / dof; i++)
813     if (!PetscBTLookup(bt, i)) gidx[i] = -1;
814   PetscCall(PetscBTDestroy(&bt));
815   PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)dm), dof, nv / dof, gidx, PETSC_OWN_POINTER, &is));
816   PetscCall(ISLocalToGlobalMappingCreateIS(is, &ltog));
817   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
818   PetscCall(ISLocalToGlobalMappingDestroy(&ltog));
819   PetscCall(ISDestroy(&is));
820 
821   /* Preallocation */
822   if (dm->prealloc_skip) {
823     PetscCall(MatSetUp(J));
824   } else {
825     PetscCall(MatISGetLocalMat(J, &lJ));
826     PetscCall(MatGetLocalToGlobalMapping(lJ, &ltog, NULL));
827     PetscCall(MatCreate(PetscObjectComm((PetscObject)lJ), &P));
828     PetscCall(MatSetType(P, MATPREALLOCATOR));
829     PetscCall(MatSetLocalToGlobalMapping(P, ltog, ltog));
830     PetscCall(MatGetSize(lJ, &N, NULL));
831     PetscCall(MatGetLocalSize(lJ, &n, NULL));
832     PetscCall(MatSetSizes(P, n, n, N, N));
833     PetscCall(MatSetBlockSize(P, dof));
834     PetscCall(MatSetUp(P));
835     for (i = 0; i < nel; i++) PetscCall(MatSetValuesBlockedLocal(P, nen, e_loc + i * nen, nen, e_loc + i * nen, NULL, INSERT_VALUES));
836     PetscCall(MatPreallocatorPreallocate(P, (PetscBool)!da->prealloc_only, lJ));
837     PetscCall(MatISRestoreLocalMat(J, &lJ));
838     PetscCall(DMDARestoreElements(dm, &nel, &nen, &e_loc));
839     PetscCall(MatDestroy(&P));
840 
841     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
842     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
843   }
844   PetscFunctionReturn(0);
845 }
846 
847 PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da, Mat J)
848 {
849   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p;
850   PetscInt               lstart, lend, pstart, pend, *dnz, *onz;
851   MPI_Comm               comm;
852   PetscScalar           *values;
853   DMBoundaryType         bx, by;
854   ISLocalToGlobalMapping ltog;
855   DMDAStencilType        st;
856 
857   PetscFunctionBegin;
858   /*
859          nc - number of components per grid point
860          col - number of colors needed in one direction for single component problem
861 
862   */
863   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st));
864   col = 2 * s + 1;
865   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
866   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
867   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
868 
869   PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols));
870   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
871 
872   PetscCall(MatSetBlockSize(J, nc));
873   /* determine the matrix preallocation information */
874   MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
875   for (i = xs; i < xs + nx; i++) {
876     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
877     pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
878 
879     for (j = ys; j < ys + ny; j++) {
880       slot = i - gxs + gnx * (j - gys);
881 
882       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
883       lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
884 
885       cnt = 0;
886       for (k = 0; k < nc; k++) {
887         for (l = lstart; l < lend + 1; l++) {
888           for (p = pstart; p < pend + 1; p++) {
889             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
890               cols[cnt++] = k + nc * (slot + gnx * l + p);
891             }
892           }
893         }
894         rows[k] = k + nc * (slot);
895       }
896       PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
897     }
898   }
899   PetscCall(MatSetBlockSize(J, nc));
900   PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz));
901   PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz));
902   MatPreallocateEnd(dnz, onz);
903 
904   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
905 
906   /*
907     For each node in the grid: we get the neighbors in the local (on processor ordering
908     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
909     PETSc ordering.
910   */
911   if (!da->prealloc_only) {
912     PetscCall(PetscCalloc1(col * col * nc * nc, &values));
913     for (i = xs; i < xs + nx; i++) {
914       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
915       pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
916 
917       for (j = ys; j < ys + ny; j++) {
918         slot = i - gxs + gnx * (j - gys);
919 
920         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
921         lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
922 
923         cnt = 0;
924         for (k = 0; k < nc; k++) {
925           for (l = lstart; l < lend + 1; l++) {
926             for (p = pstart; p < pend + 1; p++) {
927               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
928                 cols[cnt++] = k + nc * (slot + gnx * l + p);
929               }
930             }
931           }
932           rows[k] = k + nc * (slot);
933         }
934         PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES));
935       }
936     }
937     PetscCall(PetscFree(values));
938     /* do not copy values to GPU since they are all zero and not yet needed there */
939     PetscCall(MatBindToCPU(J, PETSC_TRUE));
940     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
941     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
942     PetscCall(MatBindToCPU(J, PETSC_FALSE));
943     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
944   }
945   PetscCall(PetscFree2(rows, cols));
946   PetscFunctionReturn(0);
947 }
948 
949 PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da, Mat J)
950 {
951   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
952   PetscInt               m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL;
953   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
954   MPI_Comm               comm;
955   PetscScalar           *values;
956   DMBoundaryType         bx, by, bz;
957   ISLocalToGlobalMapping ltog;
958   DMDAStencilType        st;
959 
960   PetscFunctionBegin;
961   /*
962          nc - number of components per grid point
963          col - number of colors needed in one direction for single component problem
964 
965   */
966   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
967   col = 2 * s + 1;
968   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
969   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
970   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
971 
972   PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols));
973   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
974 
975   PetscCall(MatSetBlockSize(J, nc));
976   /* determine the matrix preallocation information */
977   MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
978   for (i = xs; i < xs + nx; i++) {
979     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
980     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
981     for (j = ys; j < ys + ny; j++) {
982       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
983       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
984       for (k = zs; k < zs + nz; k++) {
985         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
986         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
987 
988         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
989 
990         cnt = 0;
991         for (l = 0; l < nc; l++) {
992           for (ii = istart; ii < iend + 1; ii++) {
993             for (jj = jstart; jj < jend + 1; jj++) {
994               for (kk = kstart; kk < kend + 1; kk++) {
995                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
996                   cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
997                 }
998               }
999             }
1000           }
1001           rows[l] = l + nc * (slot);
1002         }
1003         PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1004       }
1005     }
1006   }
1007   PetscCall(MatSetBlockSize(J, nc));
1008   PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz));
1009   PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz));
1010   MatPreallocateEnd(dnz, onz);
1011   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1012 
1013   /*
1014     For each node in the grid: we get the neighbors in the local (on processor ordering
1015     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1016     PETSc ordering.
1017   */
1018   if (!da->prealloc_only) {
1019     PetscCall(PetscCalloc1(col * col * col * nc * nc * nc, &values));
1020     for (i = xs; i < xs + nx; i++) {
1021       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1022       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1023       for (j = ys; j < ys + ny; j++) {
1024         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1025         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1026         for (k = zs; k < zs + nz; k++) {
1027           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1028           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1029 
1030           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1031 
1032           cnt = 0;
1033           for (l = 0; l < nc; l++) {
1034             for (ii = istart; ii < iend + 1; ii++) {
1035               for (jj = jstart; jj < jend + 1; jj++) {
1036                 for (kk = kstart; kk < kend + 1; kk++) {
1037                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1038                     cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
1039                   }
1040                 }
1041               }
1042             }
1043             rows[l] = l + nc * (slot);
1044           }
1045           PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES));
1046         }
1047       }
1048     }
1049     PetscCall(PetscFree(values));
1050     /* do not copy values to GPU since they are all zero and not yet needed there */
1051     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1052     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1053     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1054     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1055     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1056   }
1057   PetscCall(PetscFree2(rows, cols));
1058   PetscFunctionReturn(0);
1059 }
1060 
1061 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da, Mat J)
1062 {
1063   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, M, N;
1064   PetscInt               lstart, lend, pstart, pend, *dnz, *onz;
1065   MPI_Comm               comm;
1066   DMBoundaryType         bx, by;
1067   ISLocalToGlobalMapping ltog, mltog;
1068   DMDAStencilType        st;
1069   PetscBool              removedups = PETSC_FALSE, alreadyboundtocpu = PETSC_TRUE;
1070 
1071   PetscFunctionBegin;
1072   /*
1073          nc - number of components per grid point
1074          col - number of colors needed in one direction for single component problem
1075 
1076   */
1077   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st));
1078   if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE));
1079   col = 2 * s + 1;
1080   /*
1081        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1082        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1083   */
1084   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1085   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1086   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1087   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1088   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1089 
1090   PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols));
1091   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1092 
1093   PetscCall(MatSetBlockSize(J, nc));
1094   /* determine the matrix preallocation information */
1095   MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
1096   for (i = xs; i < xs + nx; i++) {
1097     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1098     pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1099 
1100     for (j = ys; j < ys + ny; j++) {
1101       slot = i - gxs + gnx * (j - gys);
1102 
1103       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1104       lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1105 
1106       cnt = 0;
1107       for (k = 0; k < nc; k++) {
1108         for (l = lstart; l < lend + 1; l++) {
1109           for (p = pstart; p < pend + 1; p++) {
1110             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
1111               cols[cnt++] = k + nc * (slot + gnx * l + p);
1112             }
1113           }
1114         }
1115         rows[k] = k + nc * (slot);
1116       }
1117       if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1118       else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1119     }
1120   }
1121   PetscCall(MatSetBlockSize(J, nc));
1122   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
1123   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
1124   MatPreallocateEnd(dnz, onz);
1125   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1126   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1127 
1128   /*
1129     For each node in the grid: we get the neighbors in the local (on processor ordering
1130     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1131     PETSc ordering.
1132   */
1133   if (!da->prealloc_only) {
1134     for (i = xs; i < xs + nx; i++) {
1135       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1136       pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1137 
1138       for (j = ys; j < ys + ny; j++) {
1139         slot = i - gxs + gnx * (j - gys);
1140 
1141         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1142         lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1143 
1144         cnt = 0;
1145         for (l = lstart; l < lend + 1; l++) {
1146           for (p = pstart; p < pend + 1; p++) {
1147             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
1148               cols[cnt++] = nc * (slot + gnx * l + p);
1149               for (k = 1; k < nc; k++) {
1150                 cols[cnt] = 1 + cols[cnt - 1];
1151                 cnt++;
1152               }
1153             }
1154           }
1155         }
1156         for (k = 0; k < nc; k++) rows[k] = k + nc * (slot);
1157         PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1158       }
1159     }
1160     /* do not copy values to GPU since they are all zero and not yet needed there */
1161     PetscCall(MatBoundToCPU(J, &alreadyboundtocpu));
1162     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1163     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1164     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1165     if (!alreadyboundtocpu) PetscCall(MatBindToCPU(J, PETSC_FALSE));
1166     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1167     if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1168   }
1169   PetscCall(PetscFree2(rows, cols));
1170   PetscFunctionReturn(0);
1171 }
1172 
1173 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da, Mat J)
1174 {
1175   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1176   PetscInt               m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, M, N;
1177   PetscInt               lstart, lend, pstart, pend, *dnz, *onz;
1178   DM_DA                 *dd = (DM_DA *)da->data;
1179   PetscInt               ifill_col, *ofill = dd->ofill, *dfill = dd->dfill;
1180   MPI_Comm               comm;
1181   DMBoundaryType         bx, by;
1182   ISLocalToGlobalMapping ltog;
1183   DMDAStencilType        st;
1184   PetscBool              removedups = PETSC_FALSE;
1185 
1186   PetscFunctionBegin;
1187   /*
1188          nc - number of components per grid point
1189          col - number of colors needed in one direction for single component problem
1190 
1191   */
1192   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st));
1193   col = 2 * s + 1;
1194   /*
1195        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1196        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1197   */
1198   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1199   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1200   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1201   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1202   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1203 
1204   PetscCall(PetscMalloc1(col * col * nc, &cols));
1205   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1206 
1207   PetscCall(MatSetBlockSize(J, nc));
1208   /* determine the matrix preallocation information */
1209   MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
1210   for (i = xs; i < xs + nx; i++) {
1211     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1212     pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1213 
1214     for (j = ys; j < ys + ny; j++) {
1215       slot = i - gxs + gnx * (j - gys);
1216 
1217       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1218       lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1219 
1220       for (k = 0; k < nc; k++) {
1221         cnt = 0;
1222         for (l = lstart; l < lend + 1; l++) {
1223           for (p = pstart; p < pend + 1; p++) {
1224             if (l || p) {
1225               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */
1226                 for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p);
1227               }
1228             } else {
1229               if (dfill) {
1230                 for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p);
1231               } else {
1232                 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p);
1233               }
1234             }
1235           }
1236         }
1237         row    = k + nc * (slot);
1238         maxcnt = PetscMax(maxcnt, cnt);
1239         if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
1240         else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
1241       }
1242     }
1243   }
1244   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
1245   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
1246   MatPreallocateEnd(dnz, onz);
1247   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1248 
1249   /*
1250     For each node in the grid: we get the neighbors in the local (on processor ordering
1251     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1252     PETSc ordering.
1253   */
1254   if (!da->prealloc_only) {
1255     for (i = xs; i < xs + nx; i++) {
1256       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1257       pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1258 
1259       for (j = ys; j < ys + ny; j++) {
1260         slot = i - gxs + gnx * (j - gys);
1261 
1262         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1263         lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1264 
1265         for (k = 0; k < nc; k++) {
1266           cnt = 0;
1267           for (l = lstart; l < lend + 1; l++) {
1268             for (p = pstart; p < pend + 1; p++) {
1269               if (l || p) {
1270                 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */
1271                   for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p);
1272                 }
1273               } else {
1274                 if (dfill) {
1275                   for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p);
1276                 } else {
1277                   for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p);
1278                 }
1279               }
1280             }
1281           }
1282           row = k + nc * (slot);
1283           PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1284         }
1285       }
1286     }
1287     /* do not copy values to GPU since they are all zero and not yet needed there */
1288     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1289     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1290     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1291     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1292     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1293   }
1294   PetscCall(PetscFree(cols));
1295   PetscFunctionReturn(0);
1296 }
1297 
1298 /* ---------------------------------------------------------------------------------*/
1299 
1300 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da, Mat J)
1301 {
1302   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1303   PetscInt               m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL;
1304   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
1305   MPI_Comm               comm;
1306   DMBoundaryType         bx, by, bz;
1307   ISLocalToGlobalMapping ltog, mltog;
1308   DMDAStencilType        st;
1309   PetscBool              removedups = PETSC_FALSE;
1310 
1311   PetscFunctionBegin;
1312   /*
1313          nc - number of components per grid point
1314          col - number of colors needed in one direction for single component problem
1315 
1316   */
1317   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
1318   if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE));
1319   col = 2 * s + 1;
1320 
1321   /*
1322        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1323        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1324   */
1325   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1326   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1327   if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE;
1328 
1329   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
1330   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
1331   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1332 
1333   PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols));
1334   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1335 
1336   PetscCall(MatSetBlockSize(J, nc));
1337   /* determine the matrix preallocation information */
1338   MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
1339   for (i = xs; i < xs + nx; i++) {
1340     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1341     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1342     for (j = ys; j < ys + ny; j++) {
1343       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1344       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1345       for (k = zs; k < zs + nz; k++) {
1346         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1347         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1348 
1349         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1350 
1351         cnt = 0;
1352         for (l = 0; l < nc; l++) {
1353           for (ii = istart; ii < iend + 1; ii++) {
1354             for (jj = jstart; jj < jend + 1; jj++) {
1355               for (kk = kstart; kk < kend + 1; kk++) {
1356                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1357                   cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
1358                 }
1359               }
1360             }
1361           }
1362           rows[l] = l + nc * (slot);
1363         }
1364         if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1365         else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1366       }
1367     }
1368   }
1369   PetscCall(MatSetBlockSize(J, nc));
1370   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
1371   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
1372   MatPreallocateEnd(dnz, onz);
1373   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1374   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1375 
1376   /*
1377     For each node in the grid: we get the neighbors in the local (on processor ordering
1378     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1379     PETSc ordering.
1380   */
1381   if (!da->prealloc_only) {
1382     for (i = xs; i < xs + nx; i++) {
1383       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1384       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1385       for (j = ys; j < ys + ny; j++) {
1386         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1387         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1388         for (k = zs; k < zs + nz; k++) {
1389           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1390           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1391 
1392           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1393 
1394           cnt = 0;
1395           for (kk = kstart; kk < kend + 1; kk++) {
1396             for (jj = jstart; jj < jend + 1; jj++) {
1397               for (ii = istart; ii < iend + 1; ii++) {
1398                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1399                   cols[cnt++] = nc * (slot + ii + gnx * jj + gnx * gny * kk);
1400                   for (l = 1; l < nc; l++) {
1401                     cols[cnt] = 1 + cols[cnt - 1];
1402                     cnt++;
1403                   }
1404                 }
1405               }
1406             }
1407           }
1408           rows[0] = nc * (slot);
1409           for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1410           PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1411         }
1412       }
1413     }
1414     /* do not copy values to GPU since they are all zero and not yet needed there */
1415     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1416     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1417     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1418     if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1419     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1420     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1421   }
1422   PetscCall(PetscFree2(rows, cols));
1423   PetscFunctionReturn(0);
1424 }
1425 
1426 /* ---------------------------------------------------------------------------------*/
1427 
1428 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da, Mat J)
1429 {
1430   DM_DA                 *dd = (DM_DA *)da->data;
1431   PetscInt               xs, nx, i, j, gxs, gnx, row, k, l;
1432   PetscInt               m, dim, s, *cols = NULL, nc, cnt, maxcnt = 0, *ocols;
1433   PetscInt              *ofill = dd->ofill, *dfill = dd->dfill;
1434   DMBoundaryType         bx;
1435   ISLocalToGlobalMapping ltog;
1436   PetscMPIInt            rank, size;
1437 
1438   PetscFunctionBegin;
1439   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)da), &rank));
1440   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size));
1441 
1442   /*
1443          nc - number of components per grid point
1444 
1445   */
1446   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
1447   PetscCheck(s <= 1, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Matrix creation for 1d not implemented correctly for stencil width larger than 1");
1448   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
1449   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
1450 
1451   PetscCall(MatSetBlockSize(J, nc));
1452   PetscCall(PetscCalloc2(nx * nc, &cols, nx * nc, &ocols));
1453 
1454   /*
1455         note should be smaller for first and last process with no periodic
1456         does not handle dfill
1457   */
1458   cnt = 0;
1459   /* coupling with process to the left */
1460   for (i = 0; i < s; i++) {
1461     for (j = 0; j < nc; j++) {
1462       ocols[cnt] = ((rank == 0) ? 0 : (s - i) * (ofill[j + 1] - ofill[j]));
1463       cols[cnt]  = dfill[j + 1] - dfill[j] + (s + i) * (ofill[j + 1] - ofill[j]);
1464       if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1465         if (size > 1) ocols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]);
1466         else cols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]);
1467       }
1468       maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1469       cnt++;
1470     }
1471   }
1472   for (i = s; i < nx - s; i++) {
1473     for (j = 0; j < nc; j++) {
1474       cols[cnt] = dfill[j + 1] - dfill[j] + 2 * s * (ofill[j + 1] - ofill[j]);
1475       maxcnt    = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1476       cnt++;
1477     }
1478   }
1479   /* coupling with process to the right */
1480   for (i = nx - s; i < nx; i++) {
1481     for (j = 0; j < nc; j++) {
1482       ocols[cnt] = ((rank == (size - 1)) ? 0 : (i - nx + s + 1) * (ofill[j + 1] - ofill[j]));
1483       cols[cnt]  = dfill[j + 1] - dfill[j] + (s + nx - i - 1) * (ofill[j + 1] - ofill[j]);
1484       if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1485         if (size > 1) ocols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]);
1486         else cols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]);
1487       }
1488       maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1489       cnt++;
1490     }
1491   }
1492 
1493   PetscCall(MatSeqAIJSetPreallocation(J, 0, cols));
1494   PetscCall(MatMPIAIJSetPreallocation(J, 0, cols, 0, ocols));
1495   PetscCall(PetscFree2(cols, ocols));
1496 
1497   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1498   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1499 
1500   /*
1501     For each node in the grid: we get the neighbors in the local (on processor ordering
1502     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1503     PETSc ordering.
1504   */
1505   if (!da->prealloc_only) {
1506     PetscCall(PetscMalloc1(maxcnt, &cols));
1507     row = xs * nc;
1508     /* coupling with process to the left */
1509     for (i = xs; i < xs + s; i++) {
1510       for (j = 0; j < nc; j++) {
1511         cnt = 0;
1512         if (rank) {
1513           for (l = 0; l < s; l++) {
1514             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1515           }
1516         }
1517         if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1518           for (l = 0; l < s; l++) {
1519             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (m + i - s - l) * nc + ofill[k];
1520           }
1521         }
1522         if (dfill) {
1523           for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1524         } else {
1525           for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1526         }
1527         for (l = 0; l < s; l++) {
1528           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1529         }
1530         PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1531         row++;
1532       }
1533     }
1534     for (i = xs + s; i < xs + nx - s; i++) {
1535       for (j = 0; j < nc; j++) {
1536         cnt = 0;
1537         for (l = 0; l < s; l++) {
1538           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1539         }
1540         if (dfill) {
1541           for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1542         } else {
1543           for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1544         }
1545         for (l = 0; l < s; l++) {
1546           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1547         }
1548         PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1549         row++;
1550       }
1551     }
1552     /* coupling with process to the right */
1553     for (i = xs + nx - s; i < xs + nx; i++) {
1554       for (j = 0; j < nc; j++) {
1555         cnt = 0;
1556         for (l = 0; l < s; l++) {
1557           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1558         }
1559         if (dfill) {
1560           for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1561         } else {
1562           for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1563         }
1564         if (rank < size - 1) {
1565           for (l = 0; l < s; l++) {
1566             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1567           }
1568         }
1569         if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1570           for (l = 0; l < s; l++) {
1571             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s - l - m + 2) * nc + ofill[k];
1572           }
1573         }
1574         PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1575         row++;
1576       }
1577     }
1578     PetscCall(PetscFree(cols));
1579     /* do not copy values to GPU since they are all zero and not yet needed there */
1580     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1581     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1582     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1583     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1584     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1585   }
1586   PetscFunctionReturn(0);
1587 }
1588 
1589 /* ---------------------------------------------------------------------------------*/
1590 
1591 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da, Mat J)
1592 {
1593   PetscInt               xs, nx, i, i1, slot, gxs, gnx;
1594   PetscInt               m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l;
1595   PetscInt               istart, iend;
1596   DMBoundaryType         bx;
1597   ISLocalToGlobalMapping ltog, mltog;
1598 
1599   PetscFunctionBegin;
1600   /*
1601          nc - number of components per grid point
1602          col - number of colors needed in one direction for single component problem
1603 
1604   */
1605   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
1606   if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE));
1607   col = 2 * s + 1;
1608 
1609   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
1610   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
1611 
1612   PetscCall(MatSetBlockSize(J, nc));
1613   PetscCall(MatSeqAIJSetPreallocation(J, col * nc, NULL));
1614   PetscCall(MatMPIAIJSetPreallocation(J, col * nc, NULL, col * nc, NULL));
1615 
1616   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1617   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1618   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1619 
1620   /*
1621     For each node in the grid: we get the neighbors in the local (on processor ordering
1622     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1623     PETSc ordering.
1624   */
1625   if (!da->prealloc_only) {
1626     PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols));
1627     for (i = xs; i < xs + nx; i++) {
1628       istart = PetscMax(-s, gxs - i);
1629       iend   = PetscMin(s, gxs + gnx - i - 1);
1630       slot   = i - gxs;
1631 
1632       cnt = 0;
1633       for (i1 = istart; i1 < iend + 1; i1++) {
1634         cols[cnt++] = nc * (slot + i1);
1635         for (l = 1; l < nc; l++) {
1636           cols[cnt] = 1 + cols[cnt - 1];
1637           cnt++;
1638         }
1639       }
1640       rows[0] = nc * (slot);
1641       for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1642       PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1643     }
1644     /* do not copy values to GPU since they are all zero and not yet needed there */
1645     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1646     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1647     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1648     if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1649     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1650     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1651     PetscCall(PetscFree2(rows, cols));
1652   }
1653   PetscFunctionReturn(0);
1654 }
1655 
1656 /* ---------------------------------------------------------------------------------*/
1657 
1658 PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM da, Mat J)
1659 {
1660   PetscInt               xs, nx, i, i1, slot, gxs, gnx;
1661   PetscInt               m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l;
1662   PetscInt               istart, iend;
1663   DMBoundaryType         bx;
1664   ISLocalToGlobalMapping ltog, mltog;
1665 
1666   PetscFunctionBegin;
1667   /*
1668          nc - number of components per grid point
1669          col - number of colors needed in one direction for single component problem
1670   */
1671   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
1672   col = 2 * s + 1;
1673 
1674   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
1675   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
1676 
1677   PetscCall(MatSetBlockSize(J, nc));
1678   PetscCall(MatSeqAIJSetTotalPreallocation(J, nx * nc * col * nc));
1679 
1680   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1681   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1682   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1683 
1684   /*
1685     For each node in the grid: we get the neighbors in the local (on processor ordering
1686     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1687     PETSc ordering.
1688   */
1689   if (!da->prealloc_only) {
1690     PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols));
1691     for (i = xs; i < xs + nx; i++) {
1692       istart = PetscMax(-s, gxs - i);
1693       iend   = PetscMin(s, gxs + gnx - i - 1);
1694       slot   = i - gxs;
1695 
1696       cnt = 0;
1697       for (i1 = istart; i1 < iend + 1; i1++) {
1698         cols[cnt++] = nc * (slot + i1);
1699         for (l = 1; l < nc; l++) {
1700           cols[cnt] = 1 + cols[cnt - 1];
1701           cnt++;
1702         }
1703       }
1704       rows[0] = nc * (slot);
1705       for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1706       PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1707     }
1708     /* do not copy values to GPU since they are all zero and not yet needed there */
1709     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1710     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1711     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1712     if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1713     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1714     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1715     PetscCall(PetscFree2(rows, cols));
1716   }
1717   PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1718   PetscFunctionReturn(0);
1719 }
1720 
1721 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da, Mat J)
1722 {
1723   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1724   PetscInt               m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz;
1725   PetscInt               istart, iend, jstart, jend, ii, jj;
1726   MPI_Comm               comm;
1727   PetscScalar           *values;
1728   DMBoundaryType         bx, by;
1729   DMDAStencilType        st;
1730   ISLocalToGlobalMapping ltog;
1731 
1732   PetscFunctionBegin;
1733   /*
1734      nc - number of components per grid point
1735      col - number of colors needed in one direction for single component problem
1736   */
1737   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st));
1738   col = 2 * s + 1;
1739 
1740   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1741   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1742   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1743 
1744   PetscCall(PetscMalloc1(col * col * nc * nc, &cols));
1745 
1746   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1747 
1748   /* determine the matrix preallocation information */
1749   MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz);
1750   for (i = xs; i < xs + nx; i++) {
1751     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1752     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1753     for (j = ys; j < ys + ny; j++) {
1754       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1755       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1756       slot   = i - gxs + gnx * (j - gys);
1757 
1758       /* Find block columns in block row */
1759       cnt = 0;
1760       for (ii = istart; ii < iend + 1; ii++) {
1761         for (jj = jstart; jj < jend + 1; jj++) {
1762           if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1763             cols[cnt++] = slot + ii + gnx * jj;
1764           }
1765         }
1766       }
1767       PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz));
1768     }
1769   }
1770   PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz));
1771   PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
1772   MatPreallocateEnd(dnz, onz);
1773 
1774   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1775 
1776   /*
1777     For each node in the grid: we get the neighbors in the local (on processor ordering
1778     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1779     PETSc ordering.
1780   */
1781   if (!da->prealloc_only) {
1782     PetscCall(PetscCalloc1(col * col * nc * nc, &values));
1783     for (i = xs; i < xs + nx; i++) {
1784       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1785       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1786       for (j = ys; j < ys + ny; j++) {
1787         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1788         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1789         slot   = i - gxs + gnx * (j - gys);
1790         cnt    = 0;
1791         for (ii = istart; ii < iend + 1; ii++) {
1792           for (jj = jstart; jj < jend + 1; jj++) {
1793             if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1794               cols[cnt++] = slot + ii + gnx * jj;
1795             }
1796           }
1797         }
1798         PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
1799       }
1800     }
1801     PetscCall(PetscFree(values));
1802     /* do not copy values to GPU since they are all zero and not yet needed there */
1803     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1804     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1805     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1806     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1807     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1808   }
1809   PetscCall(PetscFree(cols));
1810   PetscFunctionReturn(0);
1811 }
1812 
1813 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da, Mat J)
1814 {
1815   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1816   PetscInt               m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz;
1817   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk;
1818   MPI_Comm               comm;
1819   PetscScalar           *values;
1820   DMBoundaryType         bx, by, bz;
1821   DMDAStencilType        st;
1822   ISLocalToGlobalMapping ltog;
1823 
1824   PetscFunctionBegin;
1825   /*
1826          nc - number of components per grid point
1827          col - number of colors needed in one direction for single component problem
1828 
1829   */
1830   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st));
1831   col = 2 * s + 1;
1832 
1833   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
1834   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
1835   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1836 
1837   PetscCall(PetscMalloc1(col * col * col, &cols));
1838 
1839   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1840 
1841   /* determine the matrix preallocation information */
1842   MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz);
1843   for (i = xs; i < xs + nx; i++) {
1844     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1845     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1846     for (j = ys; j < ys + ny; j++) {
1847       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1848       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1849       for (k = zs; k < zs + nz; k++) {
1850         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1851         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1852 
1853         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1854 
1855         /* Find block columns in block row */
1856         cnt = 0;
1857         for (ii = istart; ii < iend + 1; ii++) {
1858           for (jj = jstart; jj < jend + 1; jj++) {
1859             for (kk = kstart; kk < kend + 1; kk++) {
1860               if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1861                 cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
1862               }
1863             }
1864           }
1865         }
1866         PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz));
1867       }
1868     }
1869   }
1870   PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz));
1871   PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
1872   MatPreallocateEnd(dnz, onz);
1873 
1874   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1875 
1876   /*
1877     For each node in the grid: we get the neighbors in the local (on processor ordering
1878     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1879     PETSc ordering.
1880   */
1881   if (!da->prealloc_only) {
1882     PetscCall(PetscCalloc1(col * col * col * nc * nc, &values));
1883     for (i = xs; i < xs + nx; i++) {
1884       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1885       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1886       for (j = ys; j < ys + ny; j++) {
1887         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1888         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1889         for (k = zs; k < zs + nz; k++) {
1890           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1891           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1892 
1893           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1894 
1895           cnt = 0;
1896           for (ii = istart; ii < iend + 1; ii++) {
1897             for (jj = jstart; jj < jend + 1; jj++) {
1898               for (kk = kstart; kk < kend + 1; kk++) {
1899                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1900                   cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
1901                 }
1902               }
1903             }
1904           }
1905           PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
1906         }
1907       }
1908     }
1909     PetscCall(PetscFree(values));
1910     /* do not copy values to GPU since they are all zero and not yet needed there */
1911     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1912     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1913     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1914     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1915     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1916   }
1917   PetscCall(PetscFree(cols));
1918   PetscFunctionReturn(0);
1919 }
1920 
1921 /*
1922   This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to
1923   identify in the local ordering with periodic domain.
1924 */
1925 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog, PetscInt *row, PetscInt *cnt, PetscInt col[])
1926 {
1927   PetscInt i, n;
1928 
1929   PetscFunctionBegin;
1930   PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, 1, row, row));
1931   PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, *cnt, col, col));
1932   for (i = 0, n = 0; i < *cnt; i++) {
1933     if (col[i] >= *row) col[n++] = col[i];
1934   }
1935   *cnt = n;
1936   PetscFunctionReturn(0);
1937 }
1938 
1939 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da, Mat J)
1940 {
1941   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1942   PetscInt               m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz;
1943   PetscInt               istart, iend, jstart, jend, ii, jj;
1944   MPI_Comm               comm;
1945   PetscScalar           *values;
1946   DMBoundaryType         bx, by;
1947   DMDAStencilType        st;
1948   ISLocalToGlobalMapping ltog;
1949 
1950   PetscFunctionBegin;
1951   /*
1952      nc - number of components per grid point
1953      col - number of colors needed in one direction for single component problem
1954   */
1955   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st));
1956   col = 2 * s + 1;
1957 
1958   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1959   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1960   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1961 
1962   PetscCall(PetscMalloc1(col * col * nc * nc, &cols));
1963 
1964   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1965 
1966   /* determine the matrix preallocation information */
1967   MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz);
1968   for (i = xs; i < xs + nx; i++) {
1969     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1970     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1971     for (j = ys; j < ys + ny; j++) {
1972       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1973       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1974       slot   = i - gxs + gnx * (j - gys);
1975 
1976       /* Find block columns in block row */
1977       cnt = 0;
1978       for (ii = istart; ii < iend + 1; ii++) {
1979         for (jj = jstart; jj < jend + 1; jj++) {
1980           if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj;
1981         }
1982       }
1983       PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
1984       PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz));
1985     }
1986   }
1987   PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz));
1988   PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
1989   MatPreallocateEnd(dnz, onz);
1990 
1991   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1992 
1993   /*
1994     For each node in the grid: we get the neighbors in the local (on processor ordering
1995     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1996     PETSc ordering.
1997   */
1998   if (!da->prealloc_only) {
1999     PetscCall(PetscCalloc1(col * col * nc * nc, &values));
2000     for (i = xs; i < xs + nx; i++) {
2001       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2002       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2003       for (j = ys; j < ys + ny; j++) {
2004         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2005         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2006         slot   = i - gxs + gnx * (j - gys);
2007 
2008         /* Find block columns in block row */
2009         cnt = 0;
2010         for (ii = istart; ii < iend + 1; ii++) {
2011           for (jj = jstart; jj < jend + 1; jj++) {
2012             if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj;
2013           }
2014         }
2015         PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
2016         PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
2017       }
2018     }
2019     PetscCall(PetscFree(values));
2020     /* do not copy values to GPU since they are all zero and not yet needed there */
2021     PetscCall(MatBindToCPU(J, PETSC_TRUE));
2022     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
2023     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
2024     PetscCall(MatBindToCPU(J, PETSC_FALSE));
2025     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2026   }
2027   PetscCall(PetscFree(cols));
2028   PetscFunctionReturn(0);
2029 }
2030 
2031 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da, Mat J)
2032 {
2033   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
2034   PetscInt               m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz;
2035   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk;
2036   MPI_Comm               comm;
2037   PetscScalar           *values;
2038   DMBoundaryType         bx, by, bz;
2039   DMDAStencilType        st;
2040   ISLocalToGlobalMapping ltog;
2041 
2042   PetscFunctionBegin;
2043   /*
2044      nc - number of components per grid point
2045      col - number of colors needed in one direction for single component problem
2046   */
2047   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st));
2048   col = 2 * s + 1;
2049 
2050   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
2051   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
2052   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
2053 
2054   /* create the matrix */
2055   PetscCall(PetscMalloc1(col * col * col, &cols));
2056 
2057   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
2058 
2059   /* determine the matrix preallocation information */
2060   MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz);
2061   for (i = xs; i < xs + nx; i++) {
2062     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2063     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2064     for (j = ys; j < ys + ny; j++) {
2065       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2066       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2067       for (k = zs; k < zs + nz; k++) {
2068         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2069         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2070 
2071         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2072 
2073         /* Find block columns in block row */
2074         cnt = 0;
2075         for (ii = istart; ii < iend + 1; ii++) {
2076           for (jj = jstart; jj < jend + 1; jj++) {
2077             for (kk = kstart; kk < kend + 1; kk++) {
2078               if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
2079             }
2080           }
2081         }
2082         PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
2083         PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz));
2084       }
2085     }
2086   }
2087   PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz));
2088   PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
2089   MatPreallocateEnd(dnz, onz);
2090 
2091   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
2092 
2093   /*
2094     For each node in the grid: we get the neighbors in the local (on processor ordering
2095     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2096     PETSc ordering.
2097   */
2098   if (!da->prealloc_only) {
2099     PetscCall(PetscCalloc1(col * col * col * nc * nc, &values));
2100     for (i = xs; i < xs + nx; i++) {
2101       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2102       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2103       for (j = ys; j < ys + ny; j++) {
2104         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2105         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2106         for (k = zs; k < zs + nz; k++) {
2107           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2108           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2109 
2110           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2111 
2112           cnt = 0;
2113           for (ii = istart; ii < iend + 1; ii++) {
2114             for (jj = jstart; jj < jend + 1; jj++) {
2115               for (kk = kstart; kk < kend + 1; kk++) {
2116                 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
2117               }
2118             }
2119           }
2120           PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
2121           PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
2122         }
2123       }
2124     }
2125     PetscCall(PetscFree(values));
2126     /* do not copy values to GPU since they are all zero and not yet needed there */
2127     PetscCall(MatBindToCPU(J, PETSC_TRUE));
2128     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
2129     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
2130     PetscCall(MatBindToCPU(J, PETSC_FALSE));
2131     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2132   }
2133   PetscCall(PetscFree(cols));
2134   PetscFunctionReturn(0);
2135 }
2136 
2137 /* ---------------------------------------------------------------------------------*/
2138 
2139 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da, Mat J)
2140 {
2141   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
2142   PetscInt               m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, *dnz, *onz;
2143   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
2144   DM_DA                 *dd = (DM_DA *)da->data;
2145   PetscInt               ifill_col, *dfill = dd->dfill, *ofill = dd->ofill;
2146   MPI_Comm               comm;
2147   PetscScalar           *values;
2148   DMBoundaryType         bx, by, bz;
2149   ISLocalToGlobalMapping ltog;
2150   DMDAStencilType        st;
2151   PetscBool              removedups = PETSC_FALSE;
2152 
2153   PetscFunctionBegin;
2154   /*
2155          nc - number of components per grid point
2156          col - number of colors needed in one direction for single component problem
2157 
2158   */
2159   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
2160   col = 2 * s + 1;
2161   PetscCheck(bx != DM_BOUNDARY_PERIODIC || (m % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in X is divisible\n\
2162                  by 2*stencil_width + 1\n");
2163   PetscCheck(by != DM_BOUNDARY_PERIODIC || (n % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Y is divisible\n\
2164                  by 2*stencil_width + 1\n");
2165   PetscCheck(bz != DM_BOUNDARY_PERIODIC || (p % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Z is divisible\n\
2166                  by 2*stencil_width + 1\n");
2167 
2168   /*
2169        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
2170        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
2171   */
2172   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
2173   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
2174   if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE;
2175 
2176   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
2177   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
2178   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
2179 
2180   PetscCall(PetscMalloc1(col * col * col * nc, &cols));
2181   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
2182 
2183   /* determine the matrix preallocation information */
2184   MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
2185 
2186   PetscCall(MatSetBlockSize(J, nc));
2187   for (i = xs; i < xs + nx; i++) {
2188     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2189     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2190     for (j = ys; j < ys + ny; j++) {
2191       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2192       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2193       for (k = zs; k < zs + nz; k++) {
2194         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2195         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2196 
2197         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2198 
2199         for (l = 0; l < nc; l++) {
2200           cnt = 0;
2201           for (ii = istart; ii < iend + 1; ii++) {
2202             for (jj = jstart; jj < jend + 1; jj++) {
2203               for (kk = kstart; kk < kend + 1; kk++) {
2204                 if (ii || jj || kk) {
2205                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
2206                     for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2207                   }
2208                 } else {
2209                   if (dfill) {
2210                     for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2211                   } else {
2212                     for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2213                   }
2214                 }
2215               }
2216             }
2217           }
2218           row    = l + nc * (slot);
2219           maxcnt = PetscMax(maxcnt, cnt);
2220           if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
2221           else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
2222         }
2223       }
2224     }
2225   }
2226   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
2227   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
2228   MatPreallocateEnd(dnz, onz);
2229   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
2230 
2231   /*
2232     For each node in the grid: we get the neighbors in the local (on processor ordering
2233     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2234     PETSc ordering.
2235   */
2236   if (!da->prealloc_only) {
2237     PetscCall(PetscCalloc1(maxcnt, &values));
2238     for (i = xs; i < xs + nx; i++) {
2239       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2240       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2241       for (j = ys; j < ys + ny; j++) {
2242         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2243         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2244         for (k = zs; k < zs + nz; k++) {
2245           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2246           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2247 
2248           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2249 
2250           for (l = 0; l < nc; l++) {
2251             cnt = 0;
2252             for (ii = istart; ii < iend + 1; ii++) {
2253               for (jj = jstart; jj < jend + 1; jj++) {
2254                 for (kk = kstart; kk < kend + 1; kk++) {
2255                   if (ii || jj || kk) {
2256                     if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
2257                       for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2258                     }
2259                   } else {
2260                     if (dfill) {
2261                       for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2262                     } else {
2263                       for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2264                     }
2265                   }
2266                 }
2267               }
2268             }
2269             row = l + nc * (slot);
2270             PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, values, INSERT_VALUES));
2271           }
2272         }
2273       }
2274     }
2275     PetscCall(PetscFree(values));
2276     /* do not copy values to GPU since they are all zero and not yet needed there */
2277     PetscCall(MatBindToCPU(J, PETSC_TRUE));
2278     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
2279     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
2280     PetscCall(MatBindToCPU(J, PETSC_FALSE));
2281     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2282   }
2283   PetscCall(PetscFree(cols));
2284   PetscFunctionReturn(0);
2285 }
2286