xref: /petsc/src/dm/impls/da/fdda.c (revision 750b007cd8d816cecd9de99077bb0a703b4cf61a)
1 
2 #include <petsc/private/dmdaimpl.h> /*I      "petscdmda.h"     I*/
3 #include <petscmat.h>
4 #include <petscbt.h>
5 
6 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM, ISColoringType, ISColoring *);
7 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM, ISColoringType, ISColoring *);
8 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM, ISColoringType, ISColoring *);
9 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM, ISColoringType, ISColoring *);
10 
11 /*
12    For ghost i that may be negative or greater than the upper bound this
13   maps it into the 0:m-1 range using periodicity
14 */
15 #define SetInRange(i, m) ((i < 0) ? m + i : ((i >= m) ? i - m : i))
16 
17 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill, PetscInt w, PetscInt **rfill) {
18   PetscInt i, j, nz, *fill;
19 
20   PetscFunctionBegin;
21   if (!dfill) PetscFunctionReturn(0);
22 
23   /* count number nonzeros */
24   nz = 0;
25   for (i = 0; i < w; i++) {
26     for (j = 0; j < w; j++) {
27       if (dfill[w * i + j]) nz++;
28     }
29   }
30   PetscCall(PetscMalloc1(nz + w + 1, &fill));
31   /* construct modified CSR storage of nonzero structure */
32   /*  fill[0 -- w] marks starts of each row of column indices (and end of last row)
33    so fill[1] - fill[0] gives number of nonzeros in first row etc */
34   nz = w + 1;
35   for (i = 0; i < w; i++) {
36     fill[i] = nz;
37     for (j = 0; j < w; j++) {
38       if (dfill[w * i + j]) {
39         fill[nz] = j;
40         nz++;
41       }
42     }
43   }
44   fill[w] = nz;
45 
46   *rfill = fill;
47   PetscFunctionReturn(0);
48 }
49 
50 static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse, PetscInt w, PetscInt **rfill) {
51   PetscInt nz;
52 
53   PetscFunctionBegin;
54   if (!dfillsparse) PetscFunctionReturn(0);
55 
56   /* Determine number of non-zeros */
57   nz = (dfillsparse[w] - w - 1);
58 
59   /* Allocate space for our copy of the given sparse matrix representation. */
60   PetscCall(PetscMalloc1(nz + w + 1, rfill));
61   PetscCall(PetscArraycpy(*rfill, dfillsparse, nz + w + 1));
62   PetscFunctionReturn(0);
63 }
64 
65 static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd) {
66   PetscInt i, k, cnt = 1;
67 
68   PetscFunctionBegin;
69 
70   /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of
71    columns to the left with any nonzeros in them plus 1 */
72   PetscCall(PetscCalloc1(dd->w, &dd->ofillcols));
73   for (i = 0; i < dd->w; i++) {
74     for (k = dd->ofill[i]; k < dd->ofill[i + 1]; k++) dd->ofillcols[dd->ofill[k]] = 1;
75   }
76   for (i = 0; i < dd->w; i++) {
77     if (dd->ofillcols[i]) dd->ofillcols[i] = cnt++;
78   }
79   PetscFunctionReturn(0);
80 }
81 
82 /*@
83     DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem
84     of the matrix returned by DMCreateMatrix().
85 
86     Logically Collective on da
87 
88     Input Parameters:
89 +   da - the distributed array
90 .   dfill - the fill pattern in the diagonal block (may be NULL, means use dense block)
91 -   ofill - the fill pattern in the off-diagonal blocks
92 
93     Level: developer
94 
95     Notes:
96     This only makes sense when you are doing multicomponent problems but using the
97        MPIAIJ matrix format
98 
99            The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries
100        representing coupling and 0 entries for missing coupling. For example
101 $             dfill[9] = {1, 0, 0,
102 $                         1, 1, 0,
103 $                         0, 1, 1}
104        means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with
105        itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the
106        diagonal block).
107 
108      DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
109      can be represented in the dfill, ofill format
110 
111    Contributed by Glenn Hammond
112 
113 .seealso `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()`
114 
115 @*/
116 PetscErrorCode DMDASetBlockFills(DM da, const PetscInt *dfill, const PetscInt *ofill) {
117   DM_DA *dd = (DM_DA *)da->data;
118 
119   PetscFunctionBegin;
120   /* save the given dfill and ofill information */
121   PetscCall(DMDASetBlockFills_Private(dfill, dd->w, &dd->dfill));
122   PetscCall(DMDASetBlockFills_Private(ofill, dd->w, &dd->ofill));
123 
124   /* count nonzeros in ofill columns */
125   PetscCall(DMDASetBlockFills_Private2(dd));
126   PetscFunctionReturn(0);
127 }
128 
129 /*@
130     DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem
131     of the matrix returned by DMCreateMatrix(), using sparse representations
132     of fill patterns.
133 
134     Logically Collective on da
135 
136     Input Parameters:
137 +   da - the distributed array
138 .   dfill - the sparse fill pattern in the diagonal block (may be NULL, means use dense block)
139 -   ofill - the sparse fill pattern in the off-diagonal blocks
140 
141     Level: developer
142 
143     Notes: This only makes sense when you are doing multicomponent problems but using the
144        MPIAIJ matrix format
145 
146            The format for dfill and ofill is a sparse representation of a
147            dof-by-dof matrix with 1 entries representing coupling and 0 entries
148            for missing coupling.  The sparse representation is a 1 dimensional
149            array of length nz + dof + 1, where nz is the number of non-zeros in
150            the matrix.  The first dof entries in the array give the
151            starting array indices of each row's items in the rest of the array,
152            the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array)
153            and the remaining nz items give the column indices of each of
154            the 1s within the logical 2D matrix.  Each row's items within
155            the array are the column indices of the 1s within that row
156            of the 2D matrix.  PETSc developers may recognize that this is the
157            same format as that computed by the DMDASetBlockFills_Private()
158            function from a dense 2D matrix representation.
159 
160      DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
161      can be represented in the dfill, ofill format
162 
163    Contributed by Philip C. Roth
164 
165 .seealso `DMDASetBlockFills()`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()`
166 
167 @*/
168 PetscErrorCode DMDASetBlockFillsSparse(DM da, const PetscInt *dfillsparse, const PetscInt *ofillsparse) {
169   DM_DA *dd = (DM_DA *)da->data;
170 
171   PetscFunctionBegin;
172   /* save the given dfill and ofill information */
173   PetscCall(DMDASetBlockFillsSparse_Private(dfillsparse, dd->w, &dd->dfill));
174   PetscCall(DMDASetBlockFillsSparse_Private(ofillsparse, dd->w, &dd->ofill));
175 
176   /* count nonzeros in ofill columns */
177   PetscCall(DMDASetBlockFills_Private2(dd));
178   PetscFunctionReturn(0);
179 }
180 
181 PetscErrorCode DMCreateColoring_DA(DM da, ISColoringType ctype, ISColoring *coloring) {
182   PetscInt       dim, m, n, p, nc;
183   DMBoundaryType bx, by, bz;
184   MPI_Comm       comm;
185   PetscMPIInt    size;
186   PetscBool      isBAIJ;
187   DM_DA         *dd = (DM_DA *)da->data;
188 
189   PetscFunctionBegin;
190   /*
191                                   m
192           ------------------------------------------------------
193          |                                                     |
194          |                                                     |
195          |               ----------------------                |
196          |               |                    |                |
197       n  |           yn  |                    |                |
198          |               |                    |                |
199          |               .---------------------                |
200          |             (xs,ys)     xn                          |
201          |            .                                        |
202          |         (gxs,gys)                                   |
203          |                                                     |
204           -----------------------------------------------------
205   */
206 
207   /*
208          nc - number of components per grid point
209          col - number of colors needed in one direction for single component problem
210 
211   */
212   PetscCall(DMDAGetInfo(da, &dim, NULL, NULL, NULL, &m, &n, &p, &nc, NULL, &bx, &by, &bz, NULL));
213 
214   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
215   PetscCallMPI(MPI_Comm_size(comm, &size));
216   if (ctype == IS_COLORING_LOCAL) {
217     if (size == 1) {
218       ctype = IS_COLORING_GLOBAL;
219     } else if (dim > 1) {
220       if ((m == 1 && bx == DM_BOUNDARY_PERIODIC) || (n == 1 && by == DM_BOUNDARY_PERIODIC) || (p == 1 && bz == DM_BOUNDARY_PERIODIC)) {
221         SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain  on the same process");
222       }
223     }
224   }
225 
226   /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ
227      matrices is for the blocks, not the individual matrix elements  */
228   PetscCall(PetscStrbeginswith(da->mattype, MATBAIJ, &isBAIJ));
229   if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATMPIBAIJ, &isBAIJ));
230   if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATSEQBAIJ, &isBAIJ));
231   if (isBAIJ) {
232     dd->w  = 1;
233     dd->xs = dd->xs / nc;
234     dd->xe = dd->xe / nc;
235     dd->Xs = dd->Xs / nc;
236     dd->Xe = dd->Xe / nc;
237   }
238 
239   /*
240      We do not provide a getcoloring function in the DMDA operations because
241    the basic DMDA does not know about matrices. We think of DMDA as being
242    more low-level then matrices.
243   */
244   if (dim == 1) PetscCall(DMCreateColoring_DA_1d_MPIAIJ(da, ctype, coloring));
245   else if (dim == 2) PetscCall(DMCreateColoring_DA_2d_MPIAIJ(da, ctype, coloring));
246   else if (dim == 3) PetscCall(DMCreateColoring_DA_3d_MPIAIJ(da, ctype, coloring));
247   else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not done for %" PetscInt_FMT " dimension, send us mail petsc-maint@mcs.anl.gov for code", dim);
248   if (isBAIJ) {
249     dd->w  = nc;
250     dd->xs = dd->xs * nc;
251     dd->xe = dd->xe * nc;
252     dd->Xs = dd->Xs * nc;
253     dd->Xe = dd->Xe * nc;
254   }
255   PetscFunctionReturn(0);
256 }
257 
258 /* ---------------------------------------------------------------------------------*/
259 
260 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring) {
261   PetscInt         xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, M, N, dim, s, k, nc, col;
262   PetscInt         ncolors;
263   MPI_Comm         comm;
264   DMBoundaryType   bx, by;
265   DMDAStencilType  st;
266   ISColoringValue *colors;
267   DM_DA           *dd = (DM_DA *)da->data;
268 
269   PetscFunctionBegin;
270   /*
271          nc - number of components per grid point
272          col - number of colors needed in one direction for single component problem
273 
274   */
275   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st));
276   col = 2 * s + 1;
277   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
278   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
279   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
280 
281   /* special case as taught to us by Paul Hovland */
282   if (st == DMDA_STENCIL_STAR && s == 1) {
283     PetscCall(DMCreateColoring_DA_2d_5pt_MPIAIJ(da, ctype, coloring));
284   } else {
285     if (ctype == IS_COLORING_GLOBAL) {
286       if (!dd->localcoloring) {
287         PetscCall(PetscMalloc1(nc * nx * ny, &colors));
288         ii = 0;
289         for (j = ys; j < ys + ny; j++) {
290           for (i = xs; i < xs + nx; i++) {
291             for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((i % col) + col * (j % col));
292           }
293         }
294         ncolors = nc + nc * (col - 1 + col * (col - 1));
295         PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring));
296       }
297       *coloring = dd->localcoloring;
298     } else if (ctype == IS_COLORING_LOCAL) {
299       if (!dd->ghostedcoloring) {
300         PetscCall(PetscMalloc1(nc * gnx * gny, &colors));
301         ii = 0;
302         for (j = gys; j < gys + gny; j++) {
303           for (i = gxs; i < gxs + gnx; i++) {
304             for (k = 0; k < nc; k++) {
305               /* the complicated stuff is to handle periodic boundaries */
306               colors[ii++] = k + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col));
307             }
308           }
309         }
310         ncolors = nc + nc * (col - 1 + col * (col - 1));
311         PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
312         /* PetscIntView(ncolors,(PetscInt*)colors,0); */
313 
314         PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
315       }
316       *coloring = dd->ghostedcoloring;
317     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
318   }
319   PetscCall(ISColoringReference(*coloring));
320   PetscFunctionReturn(0);
321 }
322 
323 /* ---------------------------------------------------------------------------------*/
324 
325 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring) {
326   PetscInt         xs, ys, nx, ny, i, j, gxs, gys, gnx, gny, m, n, p, dim, s, k, nc, col, zs, gzs, ii, l, nz, gnz, M, N, P;
327   PetscInt         ncolors;
328   MPI_Comm         comm;
329   DMBoundaryType   bx, by, bz;
330   DMDAStencilType  st;
331   ISColoringValue *colors;
332   DM_DA           *dd = (DM_DA *)da->data;
333 
334   PetscFunctionBegin;
335   /*
336          nc - number of components per grid point
337          col - number of colors needed in one direction for single component problem
338 
339   */
340   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
341   col = 2 * s + 1;
342   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
343   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
344   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
345 
346   /* create the coloring */
347   if (ctype == IS_COLORING_GLOBAL) {
348     if (!dd->localcoloring) {
349       PetscCall(PetscMalloc1(nc * nx * ny * nz, &colors));
350       ii = 0;
351       for (k = zs; k < zs + nz; k++) {
352         for (j = ys; j < ys + ny; j++) {
353           for (i = xs; i < xs + nx; i++) {
354             for (l = 0; l < nc; l++) colors[ii++] = l + nc * ((i % col) + col * (j % col) + col * col * (k % col));
355           }
356         }
357       }
358       ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1));
359       PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny * nz, colors, PETSC_OWN_POINTER, &dd->localcoloring));
360     }
361     *coloring = dd->localcoloring;
362   } else if (ctype == IS_COLORING_LOCAL) {
363     if (!dd->ghostedcoloring) {
364       PetscCall(PetscMalloc1(nc * gnx * gny * gnz, &colors));
365       ii = 0;
366       for (k = gzs; k < gzs + gnz; k++) {
367         for (j = gys; j < gys + gny; j++) {
368           for (i = gxs; i < gxs + gnx; i++) {
369             for (l = 0; l < nc; l++) {
370               /* the complicated stuff is to handle periodic boundaries */
371               colors[ii++] = l + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col) + col * col * (SetInRange(k, p) % col));
372             }
373           }
374         }
375       }
376       ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1));
377       PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny * gnz, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
378       PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
379     }
380     *coloring = dd->ghostedcoloring;
381   } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
382   PetscCall(ISColoringReference(*coloring));
383   PetscFunctionReturn(0);
384 }
385 
386 /* ---------------------------------------------------------------------------------*/
387 
388 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring) {
389   PetscInt         xs, nx, i, i1, gxs, gnx, l, m, M, dim, s, nc, col;
390   PetscInt         ncolors;
391   MPI_Comm         comm;
392   DMBoundaryType   bx;
393   ISColoringValue *colors;
394   DM_DA           *dd = (DM_DA *)da->data;
395 
396   PetscFunctionBegin;
397   /*
398          nc - number of components per grid point
399          col - number of colors needed in one direction for single component problem
400 
401   */
402   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, &M, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
403   col = 2 * s + 1;
404   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
405   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
406   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
407 
408   /* create the coloring */
409   if (ctype == IS_COLORING_GLOBAL) {
410     if (!dd->localcoloring) {
411       PetscCall(PetscMalloc1(nc * nx, &colors));
412       if (dd->ofillcols) {
413         PetscInt tc = 0;
414         for (i = 0; i < nc; i++) tc += (PetscInt)(dd->ofillcols[i] > 0);
415         i1 = 0;
416         for (i = xs; i < xs + nx; i++) {
417           for (l = 0; l < nc; l++) {
418             if (dd->ofillcols[l] && (i % col)) {
419               colors[i1++] = nc - 1 + tc * ((i % col) - 1) + dd->ofillcols[l];
420             } else {
421               colors[i1++] = l;
422             }
423           }
424         }
425         ncolors = nc + 2 * s * tc;
426       } else {
427         i1 = 0;
428         for (i = xs; i < xs + nx; i++) {
429           for (l = 0; l < nc; l++) colors[i1++] = l + nc * (i % col);
430         }
431         ncolors = nc + nc * (col - 1);
432       }
433       PetscCall(ISColoringCreate(comm, ncolors, nc * nx, colors, PETSC_OWN_POINTER, &dd->localcoloring));
434     }
435     *coloring = dd->localcoloring;
436   } else if (ctype == IS_COLORING_LOCAL) {
437     if (!dd->ghostedcoloring) {
438       PetscCall(PetscMalloc1(nc * gnx, &colors));
439       i1 = 0;
440       for (i = gxs; i < gxs + gnx; i++) {
441         for (l = 0; l < nc; l++) {
442           /* the complicated stuff is to handle periodic boundaries */
443           colors[i1++] = l + nc * (SetInRange(i, m) % col);
444         }
445       }
446       ncolors = nc + nc * (col - 1);
447       PetscCall(ISColoringCreate(comm, ncolors, nc * gnx, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
448       PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
449     }
450     *coloring = dd->ghostedcoloring;
451   } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
452   PetscCall(ISColoringReference(*coloring));
453   PetscFunctionReturn(0);
454 }
455 
456 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring) {
457   PetscInt         xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, dim, s, k, nc;
458   PetscInt         ncolors;
459   MPI_Comm         comm;
460   DMBoundaryType   bx, by;
461   ISColoringValue *colors;
462   DM_DA           *dd = (DM_DA *)da->data;
463 
464   PetscFunctionBegin;
465   /*
466          nc - number of components per grid point
467          col - number of colors needed in one direction for single component problem
468 
469   */
470   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, NULL));
471   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
472   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
473   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
474   /* create the coloring */
475   if (ctype == IS_COLORING_GLOBAL) {
476     if (!dd->localcoloring) {
477       PetscCall(PetscMalloc1(nc * nx * ny, &colors));
478       ii = 0;
479       for (j = ys; j < ys + ny; j++) {
480         for (i = xs; i < xs + nx; i++) {
481           for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((3 * j + i) % 5);
482         }
483       }
484       ncolors = 5 * nc;
485       PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring));
486     }
487     *coloring = dd->localcoloring;
488   } else if (ctype == IS_COLORING_LOCAL) {
489     if (!dd->ghostedcoloring) {
490       PetscCall(PetscMalloc1(nc * gnx * gny, &colors));
491       ii = 0;
492       for (j = gys; j < gys + gny; j++) {
493         for (i = gxs; i < gxs + gnx; i++) {
494           for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((3 * SetInRange(j, n) + SetInRange(i, m)) % 5);
495         }
496       }
497       ncolors = 5 * nc;
498       PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
499       PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
500     }
501     *coloring = dd->ghostedcoloring;
502   } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
503   PetscFunctionReturn(0);
504 }
505 
506 /* =========================================================================== */
507 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM, Mat);
508 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM, Mat);
509 extern PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM, Mat);
510 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM, Mat);
511 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM, Mat);
512 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM, Mat);
513 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM, Mat);
514 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM, Mat);
515 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM, Mat);
516 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM, Mat);
517 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM, Mat);
518 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM, Mat);
519 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM, Mat);
520 extern PetscErrorCode DMCreateMatrix_DA_IS(DM, Mat);
521 
522 /*@C
523    MatSetupDM - Sets the DMDA that is to be used by the HYPRE_StructMatrix PETSc matrix
524 
525    Logically Collective on mat
526 
527    Input Parameters:
528 +  mat - the matrix
529 -  da - the da
530 
531    Level: intermediate
532 
533 @*/
534 PetscErrorCode MatSetupDM(Mat mat, DM da) {
535   PetscFunctionBegin;
536   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
537   PetscValidHeaderSpecificType(da, DM_CLASSID, 2, DMDA);
538   PetscTryMethod(mat, "MatSetupDM_C", (Mat, DM), (mat, da));
539   PetscFunctionReturn(0);
540 }
541 
542 PetscErrorCode MatView_MPI_DA(Mat A, PetscViewer viewer) {
543   DM                da;
544   const char       *prefix;
545   Mat               Anatural;
546   AO                ao;
547   PetscInt          rstart, rend, *petsc, i;
548   IS                is;
549   MPI_Comm          comm;
550   PetscViewerFormat format;
551 
552   PetscFunctionBegin;
553   /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */
554   PetscCall(PetscViewerGetFormat(viewer, &format));
555   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(0);
556 
557   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
558   PetscCall(MatGetDM(A, &da));
559   PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA");
560 
561   PetscCall(DMDAGetAO(da, &ao));
562   PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
563   PetscCall(PetscMalloc1(rend - rstart, &petsc));
564   for (i = rstart; i < rend; i++) petsc[i - rstart] = i;
565   PetscCall(AOApplicationToPetsc(ao, rend - rstart, petsc));
566   PetscCall(ISCreateGeneral(comm, rend - rstart, petsc, PETSC_OWN_POINTER, &is));
567 
568   /* call viewer on natural ordering */
569   PetscCall(MatCreateSubMatrix(A, is, is, MAT_INITIAL_MATRIX, &Anatural));
570   PetscCall(ISDestroy(&is));
571   PetscCall(PetscObjectGetOptionsPrefix((PetscObject)A, &prefix));
572   PetscCall(PetscObjectSetOptionsPrefix((PetscObject)Anatural, prefix));
573   PetscCall(PetscObjectSetName((PetscObject)Anatural, ((PetscObject)A)->name));
574   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE;
575   PetscCall(MatView(Anatural, viewer));
576   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE;
577   PetscCall(MatDestroy(&Anatural));
578   PetscFunctionReturn(0);
579 }
580 
581 PetscErrorCode MatLoad_MPI_DA(Mat A, PetscViewer viewer) {
582   DM       da;
583   Mat      Anatural, Aapp;
584   AO       ao;
585   PetscInt rstart, rend, *app, i, m, n, M, N;
586   IS       is;
587   MPI_Comm comm;
588 
589   PetscFunctionBegin;
590   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
591   PetscCall(MatGetDM(A, &da));
592   PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA");
593 
594   /* Load the matrix in natural ordering */
595   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Anatural));
596   PetscCall(MatSetType(Anatural, ((PetscObject)A)->type_name));
597   PetscCall(MatGetSize(A, &M, &N));
598   PetscCall(MatGetLocalSize(A, &m, &n));
599   PetscCall(MatSetSizes(Anatural, m, n, M, N));
600   PetscCall(MatLoad(Anatural, viewer));
601 
602   /* Map natural ordering to application ordering and create IS */
603   PetscCall(DMDAGetAO(da, &ao));
604   PetscCall(MatGetOwnershipRange(Anatural, &rstart, &rend));
605   PetscCall(PetscMalloc1(rend - rstart, &app));
606   for (i = rstart; i < rend; i++) app[i - rstart] = i;
607   PetscCall(AOPetscToApplication(ao, rend - rstart, app));
608   PetscCall(ISCreateGeneral(comm, rend - rstart, app, PETSC_OWN_POINTER, &is));
609 
610   /* Do permutation and replace header */
611   PetscCall(MatCreateSubMatrix(Anatural, is, is, MAT_INITIAL_MATRIX, &Aapp));
612   PetscCall(MatHeaderReplace(A, &Aapp));
613   PetscCall(ISDestroy(&is));
614   PetscCall(MatDestroy(&Anatural));
615   PetscFunctionReturn(0);
616 }
617 
618 PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J) {
619   PetscInt dim, dof, nx, ny, nz, dims[3], starts[3], M, N, P;
620   Mat      A;
621   MPI_Comm comm;
622   MatType  Atype;
623   void (*aij)(void) = NULL, (*baij)(void) = NULL, (*sbaij)(void) = NULL, (*sell)(void) = NULL, (*is)(void) = NULL;
624   MatType     mtype;
625   PetscMPIInt size;
626   DM_DA      *dd = (DM_DA *)da->data;
627 
628   PetscFunctionBegin;
629   PetscCall(MatInitializePackage());
630   mtype = da->mattype;
631 
632   /*
633                                   m
634           ------------------------------------------------------
635          |                                                     |
636          |                                                     |
637          |               ----------------------                |
638          |               |                    |                |
639       n  |           ny  |                    |                |
640          |               |                    |                |
641          |               .---------------------                |
642          |             (xs,ys)     nx                          |
643          |            .                                        |
644          |         (gxs,gys)                                   |
645          |                                                     |
646           -----------------------------------------------------
647   */
648 
649   /*
650          nc - number of components per grid point
651          col - number of colors needed in one direction for single component problem
652 
653   */
654   M   = dd->M;
655   N   = dd->N;
656   P   = dd->P;
657   dim = da->dim;
658   dof = dd->w;
659   /* PetscCall(DMDAGetInfo(da,&dim,&M,&N,&P,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL)); */
660   PetscCall(DMDAGetCorners(da, NULL, NULL, NULL, &nx, &ny, &nz));
661   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
662   PetscCall(MatCreate(comm, &A));
663   PetscCall(MatSetSizes(A, dof * nx * ny * nz, dof * nx * ny * nz, dof * M * N * P, dof * M * N * P));
664   PetscCall(MatSetType(A, mtype));
665   PetscCall(MatSetFromOptions(A));
666   if (dof * nx * ny * nz < da->bind_below) {
667     PetscCall(MatSetBindingPropagates(A, PETSC_TRUE));
668     PetscCall(MatBindToCPU(A, PETSC_TRUE));
669   }
670   PetscCall(MatSetDM(A, da));
671   if (da->structure_only) PetscCall(MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE));
672   PetscCall(MatGetType(A, &Atype));
673   /*
674      We do not provide a getmatrix function in the DMDA operations because
675    the basic DMDA does not know about matrices. We think of DMDA as being more
676    more low-level than matrices. This is kind of cheating but, cause sometimes
677    we think of DMDA has higher level than matrices.
678 
679      We could switch based on Atype (or mtype), but we do not since the
680    specialized setting routines depend only on the particular preallocation
681    details of the matrix, not the type itself.
682   */
683   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJSetPreallocation_C", &aij));
684   if (!aij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJSetPreallocation_C", &aij));
685   if (!aij) {
686     PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIBAIJSetPreallocation_C", &baij));
687     if (!baij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", &baij));
688     if (!baij) {
689       PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPISBAIJSetPreallocation_C", &sbaij));
690       if (!sbaij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqSBAIJSetPreallocation_C", &sbaij));
691       if (!sbaij) {
692         PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPISELLSetPreallocation_C", &sell));
693         if (!sell) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqSELLSetPreallocation_C", &sell));
694       }
695       if (!sell) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatISSetPreallocation_C", &is));
696     }
697   }
698   if (aij) {
699     if (dim == 1) {
700       if (dd->ofill) {
701         PetscCall(DMCreateMatrix_DA_1d_MPIAIJ_Fill(da, A));
702       } else {
703         DMBoundaryType bx;
704         PetscMPIInt    size;
705         PetscCall(DMDAGetInfo(da, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &bx, NULL, NULL, NULL));
706         PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size));
707         if (size == 1 && bx == DM_BOUNDARY_NONE) {
708           PetscCall(DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(da, A));
709         } else {
710           PetscCall(DMCreateMatrix_DA_1d_MPIAIJ(da, A));
711         }
712       }
713     } else if (dim == 2) {
714       if (dd->ofill) {
715         PetscCall(DMCreateMatrix_DA_2d_MPIAIJ_Fill(da, A));
716       } else {
717         PetscCall(DMCreateMatrix_DA_2d_MPIAIJ(da, A));
718       }
719     } else if (dim == 3) {
720       if (dd->ofill) {
721         PetscCall(DMCreateMatrix_DA_3d_MPIAIJ_Fill(da, A));
722       } else {
723         PetscCall(DMCreateMatrix_DA_3d_MPIAIJ(da, A));
724       }
725     }
726   } else if (baij) {
727     if (dim == 2) {
728       PetscCall(DMCreateMatrix_DA_2d_MPIBAIJ(da, A));
729     } else if (dim == 3) {
730       PetscCall(DMCreateMatrix_DA_3d_MPIBAIJ(da, A));
731     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
732   } else if (sbaij) {
733     if (dim == 2) {
734       PetscCall(DMCreateMatrix_DA_2d_MPISBAIJ(da, A));
735     } else if (dim == 3) {
736       PetscCall(DMCreateMatrix_DA_3d_MPISBAIJ(da, A));
737     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
738   } else if (sell) {
739     if (dim == 2) {
740       PetscCall(DMCreateMatrix_DA_2d_MPISELL(da, A));
741     } else if (dim == 3) {
742       PetscCall(DMCreateMatrix_DA_3d_MPISELL(da, A));
743     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
744   } else if (is) {
745     PetscCall(DMCreateMatrix_DA_IS(da, A));
746   } else {
747     ISLocalToGlobalMapping ltog;
748 
749     PetscCall(MatSetBlockSize(A, dof));
750     PetscCall(MatSetUp(A));
751     PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
752     PetscCall(MatSetLocalToGlobalMapping(A, ltog, ltog));
753   }
754   PetscCall(DMDAGetGhostCorners(da, &starts[0], &starts[1], &starts[2], &dims[0], &dims[1], &dims[2]));
755   PetscCall(MatSetStencil(A, dim, dims, starts, dof));
756   PetscCall(MatSetDM(A, da));
757   PetscCallMPI(MPI_Comm_size(comm, &size));
758   if (size > 1) {
759     /* change viewer to display matrix in natural ordering */
760     PetscCall(MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA));
761     PetscCall(MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA));
762   }
763   *J = A;
764   PetscFunctionReturn(0);
765 }
766 
767 /* ---------------------------------------------------------------------------------*/
768 PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]);
769 
770 PetscErrorCode DMCreateMatrix_DA_IS(DM dm, Mat J) {
771   DM_DA                 *da = (DM_DA *)dm->data;
772   Mat                    lJ, P;
773   ISLocalToGlobalMapping ltog;
774   IS                     is;
775   PetscBT                bt;
776   const PetscInt        *e_loc, *idx;
777   PetscInt               i, nel, nen, nv, dof, *gidx, n, N;
778 
779   /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted)
780      We need to filter out the local indices that are not represented through the DMDAGetElements decomposition */
781   PetscFunctionBegin;
782   dof = da->w;
783   PetscCall(MatSetBlockSize(J, dof));
784   PetscCall(DMGetLocalToGlobalMapping(dm, &ltog));
785 
786   /* flag local elements indices in local DMDA numbering */
787   PetscCall(ISLocalToGlobalMappingGetSize(ltog, &nv));
788   PetscCall(PetscBTCreate(nv / dof, &bt));
789   PetscCall(DMDAGetElements(dm, &nel, &nen, &e_loc)); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */
790   for (i = 0; i < nel * nen; i++) PetscCall(PetscBTSet(bt, e_loc[i]));
791 
792   /* filter out (set to -1) the global indices not used by the local elements */
793   PetscCall(PetscMalloc1(nv / dof, &gidx));
794   PetscCall(ISLocalToGlobalMappingGetBlockIndices(ltog, &idx));
795   PetscCall(PetscArraycpy(gidx, idx, nv / dof));
796   PetscCall(ISLocalToGlobalMappingRestoreBlockIndices(ltog, &idx));
797   for (i = 0; i < nv / dof; i++)
798     if (!PetscBTLookup(bt, i)) gidx[i] = -1;
799   PetscCall(PetscBTDestroy(&bt));
800   PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)dm), dof, nv / dof, gidx, PETSC_OWN_POINTER, &is));
801   PetscCall(ISLocalToGlobalMappingCreateIS(is, &ltog));
802   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
803   PetscCall(ISLocalToGlobalMappingDestroy(&ltog));
804   PetscCall(ISDestroy(&is));
805 
806   /* Preallocation */
807   if (dm->prealloc_skip) {
808     PetscCall(MatSetUp(J));
809   } else {
810     PetscCall(MatISGetLocalMat(J, &lJ));
811     PetscCall(MatGetLocalToGlobalMapping(lJ, &ltog, NULL));
812     PetscCall(MatCreate(PetscObjectComm((PetscObject)lJ), &P));
813     PetscCall(MatSetType(P, MATPREALLOCATOR));
814     PetscCall(MatSetLocalToGlobalMapping(P, ltog, ltog));
815     PetscCall(MatGetSize(lJ, &N, NULL));
816     PetscCall(MatGetLocalSize(lJ, &n, NULL));
817     PetscCall(MatSetSizes(P, n, n, N, N));
818     PetscCall(MatSetBlockSize(P, dof));
819     PetscCall(MatSetUp(P));
820     for (i = 0; i < nel; i++) PetscCall(MatSetValuesBlockedLocal(P, nen, e_loc + i * nen, nen, e_loc + i * nen, NULL, INSERT_VALUES));
821     PetscCall(MatPreallocatorPreallocate(P, (PetscBool)!da->prealloc_only, lJ));
822     PetscCall(MatISRestoreLocalMat(J, &lJ));
823     PetscCall(DMDARestoreElements(dm, &nel, &nen, &e_loc));
824     PetscCall(MatDestroy(&P));
825 
826     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
827     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
828   }
829   PetscFunctionReturn(0);
830 }
831 
832 PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da, Mat J) {
833   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p;
834   PetscInt               lstart, lend, pstart, pend, *dnz, *onz;
835   MPI_Comm               comm;
836   PetscScalar           *values;
837   DMBoundaryType         bx, by;
838   ISLocalToGlobalMapping ltog;
839   DMDAStencilType        st;
840 
841   PetscFunctionBegin;
842   /*
843          nc - number of components per grid point
844          col - number of colors needed in one direction for single component problem
845 
846   */
847   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st));
848   col = 2 * s + 1;
849   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
850   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
851   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
852 
853   PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols));
854   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
855 
856   PetscCall(MatSetBlockSize(J, nc));
857   /* determine the matrix preallocation information */
858   MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
859   for (i = xs; i < xs + nx; i++) {
860     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
861     pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
862 
863     for (j = ys; j < ys + ny; j++) {
864       slot = i - gxs + gnx * (j - gys);
865 
866       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
867       lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
868 
869       cnt = 0;
870       for (k = 0; k < nc; k++) {
871         for (l = lstart; l < lend + 1; l++) {
872           for (p = pstart; p < pend + 1; p++) {
873             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
874               cols[cnt++] = k + nc * (slot + gnx * l + p);
875             }
876           }
877         }
878         rows[k] = k + nc * (slot);
879       }
880       PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
881     }
882   }
883   PetscCall(MatSetBlockSize(J, nc));
884   PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz));
885   PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz));
886   MatPreallocateEnd(dnz, onz);
887 
888   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
889 
890   /*
891     For each node in the grid: we get the neighbors in the local (on processor ordering
892     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
893     PETSc ordering.
894   */
895   if (!da->prealloc_only) {
896     PetscCall(PetscCalloc1(col * col * nc * nc, &values));
897     for (i = xs; i < xs + nx; i++) {
898       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
899       pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
900 
901       for (j = ys; j < ys + ny; j++) {
902         slot = i - gxs + gnx * (j - gys);
903 
904         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
905         lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
906 
907         cnt = 0;
908         for (k = 0; k < nc; k++) {
909           for (l = lstart; l < lend + 1; l++) {
910             for (p = pstart; p < pend + 1; p++) {
911               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
912                 cols[cnt++] = k + nc * (slot + gnx * l + p);
913               }
914             }
915           }
916           rows[k] = k + nc * (slot);
917         }
918         PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES));
919       }
920     }
921     PetscCall(PetscFree(values));
922     /* do not copy values to GPU since they are all zero and not yet needed there */
923     PetscCall(MatBindToCPU(J, PETSC_TRUE));
924     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
925     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
926     PetscCall(MatBindToCPU(J, PETSC_FALSE));
927     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
928   }
929   PetscCall(PetscFree2(rows, cols));
930   PetscFunctionReturn(0);
931 }
932 
933 PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da, Mat J) {
934   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
935   PetscInt               m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL;
936   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
937   MPI_Comm               comm;
938   PetscScalar           *values;
939   DMBoundaryType         bx, by, bz;
940   ISLocalToGlobalMapping ltog;
941   DMDAStencilType        st;
942 
943   PetscFunctionBegin;
944   /*
945          nc - number of components per grid point
946          col - number of colors needed in one direction for single component problem
947 
948   */
949   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
950   col = 2 * s + 1;
951   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
952   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
953   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
954 
955   PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols));
956   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
957 
958   PetscCall(MatSetBlockSize(J, nc));
959   /* determine the matrix preallocation information */
960   MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
961   for (i = xs; i < xs + nx; i++) {
962     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
963     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
964     for (j = ys; j < ys + ny; j++) {
965       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
966       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
967       for (k = zs; k < zs + nz; k++) {
968         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
969         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
970 
971         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
972 
973         cnt = 0;
974         for (l = 0; l < nc; l++) {
975           for (ii = istart; ii < iend + 1; ii++) {
976             for (jj = jstart; jj < jend + 1; jj++) {
977               for (kk = kstart; kk < kend + 1; kk++) {
978                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
979                   cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
980                 }
981               }
982             }
983           }
984           rows[l] = l + nc * (slot);
985         }
986         PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
987       }
988     }
989   }
990   PetscCall(MatSetBlockSize(J, nc));
991   PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz));
992   PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz));
993   MatPreallocateEnd(dnz, onz);
994   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
995 
996   /*
997     For each node in the grid: we get the neighbors in the local (on processor ordering
998     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
999     PETSc ordering.
1000   */
1001   if (!da->prealloc_only) {
1002     PetscCall(PetscCalloc1(col * col * col * nc * nc * nc, &values));
1003     for (i = xs; i < xs + nx; i++) {
1004       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1005       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1006       for (j = ys; j < ys + ny; j++) {
1007         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1008         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1009         for (k = zs; k < zs + nz; k++) {
1010           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1011           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1012 
1013           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1014 
1015           cnt = 0;
1016           for (l = 0; l < nc; l++) {
1017             for (ii = istart; ii < iend + 1; ii++) {
1018               for (jj = jstart; jj < jend + 1; jj++) {
1019                 for (kk = kstart; kk < kend + 1; kk++) {
1020                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1021                     cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
1022                   }
1023                 }
1024               }
1025             }
1026             rows[l] = l + nc * (slot);
1027           }
1028           PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES));
1029         }
1030       }
1031     }
1032     PetscCall(PetscFree(values));
1033     /* do not copy values to GPU since they are all zero and not yet needed there */
1034     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1035     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1036     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1037     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1038     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1039   }
1040   PetscCall(PetscFree2(rows, cols));
1041   PetscFunctionReturn(0);
1042 }
1043 
1044 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da, Mat J) {
1045   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, M, N;
1046   PetscInt               lstart, lend, pstart, pend, *dnz, *onz;
1047   MPI_Comm               comm;
1048   DMBoundaryType         bx, by;
1049   ISLocalToGlobalMapping ltog, mltog;
1050   DMDAStencilType        st;
1051   PetscBool              removedups = PETSC_FALSE, alreadyboundtocpu = PETSC_TRUE;
1052 
1053   PetscFunctionBegin;
1054   /*
1055          nc - number of components per grid point
1056          col - number of colors needed in one direction for single component problem
1057 
1058   */
1059   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st));
1060   if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE));
1061   col = 2 * s + 1;
1062   /*
1063        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1064        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1065   */
1066   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1067   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1068   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1069   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1070   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1071 
1072   PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols));
1073   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1074 
1075   PetscCall(MatSetBlockSize(J, nc));
1076   /* determine the matrix preallocation information */
1077   MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
1078   for (i = xs; i < xs + nx; i++) {
1079     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1080     pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1081 
1082     for (j = ys; j < ys + ny; j++) {
1083       slot = i - gxs + gnx * (j - gys);
1084 
1085       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1086       lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1087 
1088       cnt = 0;
1089       for (k = 0; k < nc; k++) {
1090         for (l = lstart; l < lend + 1; l++) {
1091           for (p = pstart; p < pend + 1; p++) {
1092             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
1093               cols[cnt++] = k + nc * (slot + gnx * l + p);
1094             }
1095           }
1096         }
1097         rows[k] = k + nc * (slot);
1098       }
1099       if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1100       else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1101     }
1102   }
1103   PetscCall(MatSetBlockSize(J, nc));
1104   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
1105   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
1106   MatPreallocateEnd(dnz, onz);
1107   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1108   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1109 
1110   /*
1111     For each node in the grid: we get the neighbors in the local (on processor ordering
1112     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1113     PETSc ordering.
1114   */
1115   if (!da->prealloc_only) {
1116     for (i = xs; i < xs + nx; i++) {
1117       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1118       pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1119 
1120       for (j = ys; j < ys + ny; j++) {
1121         slot = i - gxs + gnx * (j - gys);
1122 
1123         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1124         lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1125 
1126         cnt = 0;
1127         for (l = lstart; l < lend + 1; l++) {
1128           for (p = pstart; p < pend + 1; p++) {
1129             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
1130               cols[cnt++] = nc * (slot + gnx * l + p);
1131               for (k = 1; k < nc; k++) {
1132                 cols[cnt] = 1 + cols[cnt - 1];
1133                 cnt++;
1134               }
1135             }
1136           }
1137         }
1138         for (k = 0; k < nc; k++) rows[k] = k + nc * (slot);
1139         PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1140       }
1141     }
1142     /* do not copy values to GPU since they are all zero and not yet needed there */
1143     PetscCall(MatBoundToCPU(J, &alreadyboundtocpu));
1144     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1145     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1146     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1147     if (!alreadyboundtocpu) PetscCall(MatBindToCPU(J, PETSC_FALSE));
1148     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1149     if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1150   }
1151   PetscCall(PetscFree2(rows, cols));
1152   PetscFunctionReturn(0);
1153 }
1154 
1155 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da, Mat J) {
1156   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1157   PetscInt               m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, M, N;
1158   PetscInt               lstart, lend, pstart, pend, *dnz, *onz;
1159   DM_DA                 *dd = (DM_DA *)da->data;
1160   PetscInt               ifill_col, *ofill = dd->ofill, *dfill = dd->dfill;
1161   MPI_Comm               comm;
1162   DMBoundaryType         bx, by;
1163   ISLocalToGlobalMapping ltog;
1164   DMDAStencilType        st;
1165   PetscBool              removedups = PETSC_FALSE;
1166 
1167   PetscFunctionBegin;
1168   /*
1169          nc - number of components per grid point
1170          col - number of colors needed in one direction for single component problem
1171 
1172   */
1173   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st));
1174   col = 2 * s + 1;
1175   /*
1176        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1177        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1178   */
1179   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1180   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1181   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1182   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1183   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1184 
1185   PetscCall(PetscMalloc1(col * col * nc, &cols));
1186   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1187 
1188   PetscCall(MatSetBlockSize(J, nc));
1189   /* determine the matrix preallocation information */
1190   MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
1191   for (i = xs; i < xs + nx; i++) {
1192     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1193     pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1194 
1195     for (j = ys; j < ys + ny; j++) {
1196       slot = i - gxs + gnx * (j - gys);
1197 
1198       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1199       lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1200 
1201       for (k = 0; k < nc; k++) {
1202         cnt = 0;
1203         for (l = lstart; l < lend + 1; l++) {
1204           for (p = pstart; p < pend + 1; p++) {
1205             if (l || p) {
1206               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */
1207                 for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p);
1208               }
1209             } else {
1210               if (dfill) {
1211                 for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p);
1212               } else {
1213                 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p);
1214               }
1215             }
1216           }
1217         }
1218         row    = k + nc * (slot);
1219         maxcnt = PetscMax(maxcnt, cnt);
1220         if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
1221         else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
1222       }
1223     }
1224   }
1225   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
1226   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
1227   MatPreallocateEnd(dnz, onz);
1228   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1229 
1230   /*
1231     For each node in the grid: we get the neighbors in the local (on processor ordering
1232     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1233     PETSc ordering.
1234   */
1235   if (!da->prealloc_only) {
1236     for (i = xs; i < xs + nx; i++) {
1237       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1238       pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1239 
1240       for (j = ys; j < ys + ny; j++) {
1241         slot = i - gxs + gnx * (j - gys);
1242 
1243         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1244         lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1245 
1246         for (k = 0; k < nc; k++) {
1247           cnt = 0;
1248           for (l = lstart; l < lend + 1; l++) {
1249             for (p = pstart; p < pend + 1; p++) {
1250               if (l || p) {
1251                 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */
1252                   for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p);
1253                 }
1254               } else {
1255                 if (dfill) {
1256                   for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p);
1257                 } else {
1258                   for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p);
1259                 }
1260               }
1261             }
1262           }
1263           row = k + nc * (slot);
1264           PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1265         }
1266       }
1267     }
1268     /* do not copy values to GPU since they are all zero and not yet needed there */
1269     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1270     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1271     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1272     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1273     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1274   }
1275   PetscCall(PetscFree(cols));
1276   PetscFunctionReturn(0);
1277 }
1278 
1279 /* ---------------------------------------------------------------------------------*/
1280 
1281 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da, Mat J) {
1282   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1283   PetscInt               m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL;
1284   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
1285   MPI_Comm               comm;
1286   DMBoundaryType         bx, by, bz;
1287   ISLocalToGlobalMapping ltog, mltog;
1288   DMDAStencilType        st;
1289   PetscBool              removedups = PETSC_FALSE;
1290 
1291   PetscFunctionBegin;
1292   /*
1293          nc - number of components per grid point
1294          col - number of colors needed in one direction for single component problem
1295 
1296   */
1297   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
1298   if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE));
1299   col = 2 * s + 1;
1300 
1301   /*
1302        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1303        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1304   */
1305   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1306   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1307   if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE;
1308 
1309   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
1310   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
1311   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1312 
1313   PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols));
1314   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1315 
1316   PetscCall(MatSetBlockSize(J, nc));
1317   /* determine the matrix preallocation information */
1318   MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
1319   for (i = xs; i < xs + nx; i++) {
1320     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1321     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1322     for (j = ys; j < ys + ny; j++) {
1323       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1324       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1325       for (k = zs; k < zs + nz; k++) {
1326         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1327         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1328 
1329         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1330 
1331         cnt = 0;
1332         for (l = 0; l < nc; l++) {
1333           for (ii = istart; ii < iend + 1; ii++) {
1334             for (jj = jstart; jj < jend + 1; jj++) {
1335               for (kk = kstart; kk < kend + 1; kk++) {
1336                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1337                   cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
1338                 }
1339               }
1340             }
1341           }
1342           rows[l] = l + nc * (slot);
1343         }
1344         if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1345         else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1346       }
1347     }
1348   }
1349   PetscCall(MatSetBlockSize(J, nc));
1350   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
1351   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
1352   MatPreallocateEnd(dnz, onz);
1353   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1354   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1355 
1356   /*
1357     For each node in the grid: we get the neighbors in the local (on processor ordering
1358     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1359     PETSc ordering.
1360   */
1361   if (!da->prealloc_only) {
1362     for (i = xs; i < xs + nx; i++) {
1363       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1364       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1365       for (j = ys; j < ys + ny; j++) {
1366         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1367         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1368         for (k = zs; k < zs + nz; k++) {
1369           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1370           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1371 
1372           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1373 
1374           cnt = 0;
1375           for (kk = kstart; kk < kend + 1; kk++) {
1376             for (jj = jstart; jj < jend + 1; jj++) {
1377               for (ii = istart; ii < iend + 1; ii++) {
1378                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1379                   cols[cnt++] = nc * (slot + ii + gnx * jj + gnx * gny * kk);
1380                   for (l = 1; l < nc; l++) {
1381                     cols[cnt] = 1 + cols[cnt - 1];
1382                     cnt++;
1383                   }
1384                 }
1385               }
1386             }
1387           }
1388           rows[0] = nc * (slot);
1389           for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1390           PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1391         }
1392       }
1393     }
1394     /* do not copy values to GPU since they are all zero and not yet needed there */
1395     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1396     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1397     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1398     if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1399     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1400     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1401   }
1402   PetscCall(PetscFree2(rows, cols));
1403   PetscFunctionReturn(0);
1404 }
1405 
1406 /* ---------------------------------------------------------------------------------*/
1407 
1408 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da, Mat J) {
1409   DM_DA                 *dd = (DM_DA *)da->data;
1410   PetscInt               xs, nx, i, j, gxs, gnx, row, k, l;
1411   PetscInt               m, dim, s, *cols = NULL, nc, cnt, maxcnt = 0, *ocols;
1412   PetscInt              *ofill = dd->ofill, *dfill = dd->dfill;
1413   DMBoundaryType         bx;
1414   ISLocalToGlobalMapping ltog;
1415   PetscMPIInt            rank, size;
1416 
1417   PetscFunctionBegin;
1418   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)da), &rank));
1419   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size));
1420 
1421   /*
1422          nc - number of components per grid point
1423 
1424   */
1425   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
1426   PetscCheck(s <= 1, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Matrix creation for 1d not implemented correctly for stencil width larger than 1");
1427   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
1428   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
1429 
1430   PetscCall(MatSetBlockSize(J, nc));
1431   PetscCall(PetscCalloc2(nx * nc, &cols, nx * nc, &ocols));
1432 
1433   /*
1434         note should be smaller for first and last process with no periodic
1435         does not handle dfill
1436   */
1437   cnt = 0;
1438   /* coupling with process to the left */
1439   for (i = 0; i < s; i++) {
1440     for (j = 0; j < nc; j++) {
1441       ocols[cnt] = ((rank == 0) ? 0 : (s - i) * (ofill[j + 1] - ofill[j]));
1442       cols[cnt]  = dfill[j + 1] - dfill[j] + (s + i) * (ofill[j + 1] - ofill[j]);
1443       if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1444         if (size > 1) ocols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]);
1445         else cols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]);
1446       }
1447       maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1448       cnt++;
1449     }
1450   }
1451   for (i = s; i < nx - s; i++) {
1452     for (j = 0; j < nc; j++) {
1453       cols[cnt] = dfill[j + 1] - dfill[j] + 2 * s * (ofill[j + 1] - ofill[j]);
1454       maxcnt    = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1455       cnt++;
1456     }
1457   }
1458   /* coupling with process to the right */
1459   for (i = nx - s; i < nx; i++) {
1460     for (j = 0; j < nc; j++) {
1461       ocols[cnt] = ((rank == (size - 1)) ? 0 : (i - nx + s + 1) * (ofill[j + 1] - ofill[j]));
1462       cols[cnt]  = dfill[j + 1] - dfill[j] + (s + nx - i - 1) * (ofill[j + 1] - ofill[j]);
1463       if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1464         if (size > 1) ocols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]);
1465         else cols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]);
1466       }
1467       maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1468       cnt++;
1469     }
1470   }
1471 
1472   PetscCall(MatSeqAIJSetPreallocation(J, 0, cols));
1473   PetscCall(MatMPIAIJSetPreallocation(J, 0, cols, 0, ocols));
1474   PetscCall(PetscFree2(cols, ocols));
1475 
1476   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1477   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1478 
1479   /*
1480     For each node in the grid: we get the neighbors in the local (on processor ordering
1481     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1482     PETSc ordering.
1483   */
1484   if (!da->prealloc_only) {
1485     PetscCall(PetscMalloc1(maxcnt, &cols));
1486     row = xs * nc;
1487     /* coupling with process to the left */
1488     for (i = xs; i < xs + s; i++) {
1489       for (j = 0; j < nc; j++) {
1490         cnt = 0;
1491         if (rank) {
1492           for (l = 0; l < s; l++) {
1493             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1494           }
1495         }
1496         if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1497           for (l = 0; l < s; l++) {
1498             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (m + i - s - l) * nc + ofill[k];
1499           }
1500         }
1501         if (dfill) {
1502           for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1503         } else {
1504           for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1505         }
1506         for (l = 0; l < s; l++) {
1507           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1508         }
1509         PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1510         row++;
1511       }
1512     }
1513     for (i = xs + s; i < xs + nx - s; i++) {
1514       for (j = 0; j < nc; j++) {
1515         cnt = 0;
1516         for (l = 0; l < s; l++) {
1517           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1518         }
1519         if (dfill) {
1520           for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1521         } else {
1522           for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1523         }
1524         for (l = 0; l < s; l++) {
1525           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1526         }
1527         PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1528         row++;
1529       }
1530     }
1531     /* coupling with process to the right */
1532     for (i = xs + nx - s; i < xs + nx; i++) {
1533       for (j = 0; j < nc; j++) {
1534         cnt = 0;
1535         for (l = 0; l < s; l++) {
1536           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1537         }
1538         if (dfill) {
1539           for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1540         } else {
1541           for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1542         }
1543         if (rank < size - 1) {
1544           for (l = 0; l < s; l++) {
1545             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1546           }
1547         }
1548         if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1549           for (l = 0; l < s; l++) {
1550             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s - l - m + 2) * nc + ofill[k];
1551           }
1552         }
1553         PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1554         row++;
1555       }
1556     }
1557     PetscCall(PetscFree(cols));
1558     /* do not copy values to GPU since they are all zero and not yet needed there */
1559     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1560     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1561     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1562     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1563     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1564   }
1565   PetscFunctionReturn(0);
1566 }
1567 
1568 /* ---------------------------------------------------------------------------------*/
1569 
1570 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da, Mat J) {
1571   PetscInt               xs, nx, i, i1, slot, gxs, gnx;
1572   PetscInt               m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l;
1573   PetscInt               istart, iend;
1574   DMBoundaryType         bx;
1575   ISLocalToGlobalMapping ltog, mltog;
1576 
1577   PetscFunctionBegin;
1578   /*
1579          nc - number of components per grid point
1580          col - number of colors needed in one direction for single component problem
1581 
1582   */
1583   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
1584   if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE));
1585   col = 2 * s + 1;
1586 
1587   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
1588   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
1589 
1590   PetscCall(MatSetBlockSize(J, nc));
1591   PetscCall(MatSeqAIJSetPreallocation(J, col * nc, NULL));
1592   PetscCall(MatMPIAIJSetPreallocation(J, col * nc, NULL, col * nc, NULL));
1593 
1594   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1595   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1596   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1597 
1598   /*
1599     For each node in the grid: we get the neighbors in the local (on processor ordering
1600     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1601     PETSc ordering.
1602   */
1603   if (!da->prealloc_only) {
1604     PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols));
1605     for (i = xs; i < xs + nx; i++) {
1606       istart = PetscMax(-s, gxs - i);
1607       iend   = PetscMin(s, gxs + gnx - i - 1);
1608       slot   = i - gxs;
1609 
1610       cnt = 0;
1611       for (i1 = istart; i1 < iend + 1; i1++) {
1612         cols[cnt++] = nc * (slot + i1);
1613         for (l = 1; l < nc; l++) {
1614           cols[cnt] = 1 + cols[cnt - 1];
1615           cnt++;
1616         }
1617       }
1618       rows[0] = nc * (slot);
1619       for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1620       PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1621     }
1622     /* do not copy values to GPU since they are all zero and not yet needed there */
1623     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1624     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1625     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1626     if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1627     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1628     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1629     PetscCall(PetscFree2(rows, cols));
1630   }
1631   PetscFunctionReturn(0);
1632 }
1633 
1634 /* ---------------------------------------------------------------------------------*/
1635 
1636 PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM da, Mat J) {
1637   PetscInt               xs, nx, i, i1, slot, gxs, gnx;
1638   PetscInt               m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l;
1639   PetscInt               istart, iend;
1640   DMBoundaryType         bx;
1641   ISLocalToGlobalMapping ltog, mltog;
1642 
1643   PetscFunctionBegin;
1644   /*
1645          nc - number of components per grid point
1646          col - number of colors needed in one direction for single component problem
1647   */
1648   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
1649   col = 2 * s + 1;
1650 
1651   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
1652   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
1653 
1654   PetscCall(MatSetBlockSize(J, nc));
1655   PetscCall(MatSeqAIJSetTotalPreallocation(J, nx * nc * col * nc));
1656 
1657   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1658   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1659   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1660 
1661   /*
1662     For each node in the grid: we get the neighbors in the local (on processor ordering
1663     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1664     PETSc ordering.
1665   */
1666   if (!da->prealloc_only) {
1667     PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols));
1668     for (i = xs; i < xs + nx; i++) {
1669       istart = PetscMax(-s, gxs - i);
1670       iend   = PetscMin(s, gxs + gnx - i - 1);
1671       slot   = i - gxs;
1672 
1673       cnt = 0;
1674       for (i1 = istart; i1 < iend + 1; i1++) {
1675         cols[cnt++] = nc * (slot + i1);
1676         for (l = 1; l < nc; l++) {
1677           cols[cnt] = 1 + cols[cnt - 1];
1678           cnt++;
1679         }
1680       }
1681       rows[0] = nc * (slot);
1682       for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1683       PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1684     }
1685     /* do not copy values to GPU since they are all zero and not yet needed there */
1686     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1687     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1688     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1689     if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1690     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1691     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1692     PetscCall(PetscFree2(rows, cols));
1693   }
1694   PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1695   PetscFunctionReturn(0);
1696 }
1697 
1698 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da, Mat J) {
1699   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1700   PetscInt               m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz;
1701   PetscInt               istart, iend, jstart, jend, ii, jj;
1702   MPI_Comm               comm;
1703   PetscScalar           *values;
1704   DMBoundaryType         bx, by;
1705   DMDAStencilType        st;
1706   ISLocalToGlobalMapping ltog;
1707 
1708   PetscFunctionBegin;
1709   /*
1710      nc - number of components per grid point
1711      col - number of colors needed in one direction for single component problem
1712   */
1713   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st));
1714   col = 2 * s + 1;
1715 
1716   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1717   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1718   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1719 
1720   PetscCall(PetscMalloc1(col * col * nc * nc, &cols));
1721 
1722   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1723 
1724   /* determine the matrix preallocation information */
1725   MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz);
1726   for (i = xs; i < xs + nx; i++) {
1727     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1728     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1729     for (j = ys; j < ys + ny; j++) {
1730       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1731       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1732       slot   = i - gxs + gnx * (j - gys);
1733 
1734       /* Find block columns in block row */
1735       cnt = 0;
1736       for (ii = istart; ii < iend + 1; ii++) {
1737         for (jj = jstart; jj < jend + 1; jj++) {
1738           if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1739             cols[cnt++] = slot + ii + gnx * jj;
1740           }
1741         }
1742       }
1743       PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz));
1744     }
1745   }
1746   PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz));
1747   PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
1748   MatPreallocateEnd(dnz, onz);
1749 
1750   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1751 
1752   /*
1753     For each node in the grid: we get the neighbors in the local (on processor ordering
1754     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1755     PETSc ordering.
1756   */
1757   if (!da->prealloc_only) {
1758     PetscCall(PetscCalloc1(col * col * nc * nc, &values));
1759     for (i = xs; i < xs + nx; i++) {
1760       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1761       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1762       for (j = ys; j < ys + ny; j++) {
1763         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1764         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1765         slot   = i - gxs + gnx * (j - gys);
1766         cnt    = 0;
1767         for (ii = istart; ii < iend + 1; ii++) {
1768           for (jj = jstart; jj < jend + 1; jj++) {
1769             if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1770               cols[cnt++] = slot + ii + gnx * jj;
1771             }
1772           }
1773         }
1774         PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
1775       }
1776     }
1777     PetscCall(PetscFree(values));
1778     /* do not copy values to GPU since they are all zero and not yet needed there */
1779     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1780     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1781     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1782     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1783     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1784   }
1785   PetscCall(PetscFree(cols));
1786   PetscFunctionReturn(0);
1787 }
1788 
1789 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da, Mat J) {
1790   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1791   PetscInt               m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz;
1792   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk;
1793   MPI_Comm               comm;
1794   PetscScalar           *values;
1795   DMBoundaryType         bx, by, bz;
1796   DMDAStencilType        st;
1797   ISLocalToGlobalMapping ltog;
1798 
1799   PetscFunctionBegin;
1800   /*
1801          nc - number of components per grid point
1802          col - number of colors needed in one direction for single component problem
1803 
1804   */
1805   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st));
1806   col = 2 * s + 1;
1807 
1808   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
1809   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
1810   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1811 
1812   PetscCall(PetscMalloc1(col * col * col, &cols));
1813 
1814   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1815 
1816   /* determine the matrix preallocation information */
1817   MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz);
1818   for (i = xs; i < xs + nx; i++) {
1819     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1820     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1821     for (j = ys; j < ys + ny; j++) {
1822       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1823       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1824       for (k = zs; k < zs + nz; k++) {
1825         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1826         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1827 
1828         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1829 
1830         /* Find block columns in block row */
1831         cnt = 0;
1832         for (ii = istart; ii < iend + 1; ii++) {
1833           for (jj = jstart; jj < jend + 1; jj++) {
1834             for (kk = kstart; kk < kend + 1; kk++) {
1835               if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1836                 cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
1837               }
1838             }
1839           }
1840         }
1841         PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz));
1842       }
1843     }
1844   }
1845   PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz));
1846   PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
1847   MatPreallocateEnd(dnz, onz);
1848 
1849   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1850 
1851   /*
1852     For each node in the grid: we get the neighbors in the local (on processor ordering
1853     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1854     PETSc ordering.
1855   */
1856   if (!da->prealloc_only) {
1857     PetscCall(PetscCalloc1(col * col * col * nc * nc, &values));
1858     for (i = xs; i < xs + nx; i++) {
1859       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1860       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1861       for (j = ys; j < ys + ny; j++) {
1862         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1863         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1864         for (k = zs; k < zs + nz; k++) {
1865           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1866           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1867 
1868           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1869 
1870           cnt = 0;
1871           for (ii = istart; ii < iend + 1; ii++) {
1872             for (jj = jstart; jj < jend + 1; jj++) {
1873               for (kk = kstart; kk < kend + 1; kk++) {
1874                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1875                   cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
1876                 }
1877               }
1878             }
1879           }
1880           PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
1881         }
1882       }
1883     }
1884     PetscCall(PetscFree(values));
1885     /* do not copy values to GPU since they are all zero and not yet needed there */
1886     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1887     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1888     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1889     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1890     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1891   }
1892   PetscCall(PetscFree(cols));
1893   PetscFunctionReturn(0);
1894 }
1895 
1896 /*
1897   This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to
1898   identify in the local ordering with periodic domain.
1899 */
1900 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog, PetscInt *row, PetscInt *cnt, PetscInt col[]) {
1901   PetscInt i, n;
1902 
1903   PetscFunctionBegin;
1904   PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, 1, row, row));
1905   PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, *cnt, col, col));
1906   for (i = 0, n = 0; i < *cnt; i++) {
1907     if (col[i] >= *row) col[n++] = col[i];
1908   }
1909   *cnt = n;
1910   PetscFunctionReturn(0);
1911 }
1912 
1913 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da, Mat J) {
1914   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1915   PetscInt               m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz;
1916   PetscInt               istart, iend, jstart, jend, ii, jj;
1917   MPI_Comm               comm;
1918   PetscScalar           *values;
1919   DMBoundaryType         bx, by;
1920   DMDAStencilType        st;
1921   ISLocalToGlobalMapping ltog;
1922 
1923   PetscFunctionBegin;
1924   /*
1925      nc - number of components per grid point
1926      col - number of colors needed in one direction for single component problem
1927   */
1928   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st));
1929   col = 2 * s + 1;
1930 
1931   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1932   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1933   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1934 
1935   PetscCall(PetscMalloc1(col * col * nc * nc, &cols));
1936 
1937   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1938 
1939   /* determine the matrix preallocation information */
1940   MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz);
1941   for (i = xs; i < xs + nx; i++) {
1942     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1943     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1944     for (j = ys; j < ys + ny; j++) {
1945       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1946       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1947       slot   = i - gxs + gnx * (j - gys);
1948 
1949       /* Find block columns in block row */
1950       cnt = 0;
1951       for (ii = istart; ii < iend + 1; ii++) {
1952         for (jj = jstart; jj < jend + 1; jj++) {
1953           if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj;
1954         }
1955       }
1956       PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
1957       PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz));
1958     }
1959   }
1960   PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz));
1961   PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
1962   MatPreallocateEnd(dnz, onz);
1963 
1964   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1965 
1966   /*
1967     For each node in the grid: we get the neighbors in the local (on processor ordering
1968     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1969     PETSc ordering.
1970   */
1971   if (!da->prealloc_only) {
1972     PetscCall(PetscCalloc1(col * col * nc * nc, &values));
1973     for (i = xs; i < xs + nx; i++) {
1974       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1975       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1976       for (j = ys; j < ys + ny; j++) {
1977         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1978         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1979         slot   = i - gxs + gnx * (j - gys);
1980 
1981         /* Find block columns in block row */
1982         cnt = 0;
1983         for (ii = istart; ii < iend + 1; ii++) {
1984           for (jj = jstart; jj < jend + 1; jj++) {
1985             if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj;
1986           }
1987         }
1988         PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
1989         PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
1990       }
1991     }
1992     PetscCall(PetscFree(values));
1993     /* do not copy values to GPU since they are all zero and not yet needed there */
1994     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1995     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1996     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1997     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1998     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1999   }
2000   PetscCall(PetscFree(cols));
2001   PetscFunctionReturn(0);
2002 }
2003 
2004 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da, Mat J) {
2005   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
2006   PetscInt               m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz;
2007   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk;
2008   MPI_Comm               comm;
2009   PetscScalar           *values;
2010   DMBoundaryType         bx, by, bz;
2011   DMDAStencilType        st;
2012   ISLocalToGlobalMapping ltog;
2013 
2014   PetscFunctionBegin;
2015   /*
2016      nc - number of components per grid point
2017      col - number of colors needed in one direction for single component problem
2018   */
2019   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st));
2020   col = 2 * s + 1;
2021 
2022   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
2023   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
2024   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
2025 
2026   /* create the matrix */
2027   PetscCall(PetscMalloc1(col * col * col, &cols));
2028 
2029   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
2030 
2031   /* determine the matrix preallocation information */
2032   MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz);
2033   for (i = xs; i < xs + nx; i++) {
2034     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2035     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2036     for (j = ys; j < ys + ny; j++) {
2037       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2038       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2039       for (k = zs; k < zs + nz; k++) {
2040         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2041         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2042 
2043         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2044 
2045         /* Find block columns in block row */
2046         cnt = 0;
2047         for (ii = istart; ii < iend + 1; ii++) {
2048           for (jj = jstart; jj < jend + 1; jj++) {
2049             for (kk = kstart; kk < kend + 1; kk++) {
2050               if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
2051             }
2052           }
2053         }
2054         PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
2055         PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz));
2056       }
2057     }
2058   }
2059   PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz));
2060   PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
2061   MatPreallocateEnd(dnz, onz);
2062 
2063   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
2064 
2065   /*
2066     For each node in the grid: we get the neighbors in the local (on processor ordering
2067     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2068     PETSc ordering.
2069   */
2070   if (!da->prealloc_only) {
2071     PetscCall(PetscCalloc1(col * col * col * nc * nc, &values));
2072     for (i = xs; i < xs + nx; i++) {
2073       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2074       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2075       for (j = ys; j < ys + ny; j++) {
2076         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2077         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2078         for (k = zs; k < zs + nz; k++) {
2079           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2080           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2081 
2082           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2083 
2084           cnt = 0;
2085           for (ii = istart; ii < iend + 1; ii++) {
2086             for (jj = jstart; jj < jend + 1; jj++) {
2087               for (kk = kstart; kk < kend + 1; kk++) {
2088                 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
2089               }
2090             }
2091           }
2092           PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
2093           PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
2094         }
2095       }
2096     }
2097     PetscCall(PetscFree(values));
2098     /* do not copy values to GPU since they are all zero and not yet needed there */
2099     PetscCall(MatBindToCPU(J, PETSC_TRUE));
2100     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
2101     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
2102     PetscCall(MatBindToCPU(J, PETSC_FALSE));
2103     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2104   }
2105   PetscCall(PetscFree(cols));
2106   PetscFunctionReturn(0);
2107 }
2108 
2109 /* ---------------------------------------------------------------------------------*/
2110 
2111 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da, Mat J) {
2112   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
2113   PetscInt               m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, *dnz, *onz;
2114   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
2115   DM_DA                 *dd = (DM_DA *)da->data;
2116   PetscInt               ifill_col, *dfill = dd->dfill, *ofill = dd->ofill;
2117   MPI_Comm               comm;
2118   PetscScalar           *values;
2119   DMBoundaryType         bx, by, bz;
2120   ISLocalToGlobalMapping ltog;
2121   DMDAStencilType        st;
2122   PetscBool              removedups = PETSC_FALSE;
2123 
2124   PetscFunctionBegin;
2125   /*
2126          nc - number of components per grid point
2127          col - number of colors needed in one direction for single component problem
2128 
2129   */
2130   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
2131   col = 2 * s + 1;
2132   PetscCheck(bx != DM_BOUNDARY_PERIODIC || (m % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in X is divisible\n\
2133                  by 2*stencil_width + 1\n");
2134   PetscCheck(by != DM_BOUNDARY_PERIODIC || (n % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Y is divisible\n\
2135                  by 2*stencil_width + 1\n");
2136   PetscCheck(bz != DM_BOUNDARY_PERIODIC || (p % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Z is divisible\n\
2137                  by 2*stencil_width + 1\n");
2138 
2139   /*
2140        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
2141        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
2142   */
2143   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
2144   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
2145   if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE;
2146 
2147   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
2148   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
2149   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
2150 
2151   PetscCall(PetscMalloc1(col * col * col * nc, &cols));
2152   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
2153 
2154   /* determine the matrix preallocation information */
2155   MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
2156 
2157   PetscCall(MatSetBlockSize(J, nc));
2158   for (i = xs; i < xs + nx; i++) {
2159     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2160     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2161     for (j = ys; j < ys + ny; j++) {
2162       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2163       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2164       for (k = zs; k < zs + nz; k++) {
2165         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2166         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2167 
2168         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2169 
2170         for (l = 0; l < nc; l++) {
2171           cnt = 0;
2172           for (ii = istart; ii < iend + 1; ii++) {
2173             for (jj = jstart; jj < jend + 1; jj++) {
2174               for (kk = kstart; kk < kend + 1; kk++) {
2175                 if (ii || jj || kk) {
2176                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
2177                     for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2178                   }
2179                 } else {
2180                   if (dfill) {
2181                     for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2182                   } else {
2183                     for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2184                   }
2185                 }
2186               }
2187             }
2188           }
2189           row    = l + nc * (slot);
2190           maxcnt = PetscMax(maxcnt, cnt);
2191           if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
2192           else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
2193         }
2194       }
2195     }
2196   }
2197   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
2198   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
2199   MatPreallocateEnd(dnz, onz);
2200   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
2201 
2202   /*
2203     For each node in the grid: we get the neighbors in the local (on processor ordering
2204     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2205     PETSc ordering.
2206   */
2207   if (!da->prealloc_only) {
2208     PetscCall(PetscCalloc1(maxcnt, &values));
2209     for (i = xs; i < xs + nx; i++) {
2210       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2211       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2212       for (j = ys; j < ys + ny; j++) {
2213         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2214         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2215         for (k = zs; k < zs + nz; k++) {
2216           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2217           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2218 
2219           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2220 
2221           for (l = 0; l < nc; l++) {
2222             cnt = 0;
2223             for (ii = istart; ii < iend + 1; ii++) {
2224               for (jj = jstart; jj < jend + 1; jj++) {
2225                 for (kk = kstart; kk < kend + 1; kk++) {
2226                   if (ii || jj || kk) {
2227                     if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
2228                       for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2229                     }
2230                   } else {
2231                     if (dfill) {
2232                       for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2233                     } else {
2234                       for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2235                     }
2236                   }
2237                 }
2238               }
2239             }
2240             row = l + nc * (slot);
2241             PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, values, INSERT_VALUES));
2242           }
2243         }
2244       }
2245     }
2246     PetscCall(PetscFree(values));
2247     /* do not copy values to GPU since they are all zero and not yet needed there */
2248     PetscCall(MatBindToCPU(J, PETSC_TRUE));
2249     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
2250     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
2251     PetscCall(MatBindToCPU(J, PETSC_FALSE));
2252     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2253   }
2254   PetscCall(PetscFree(cols));
2255   PetscFunctionReturn(0);
2256 }
2257