xref: /petsc/src/dm/impls/da/fdda.c (revision 2ddab442c9dceb8b6d5d16746146d2202e1fcc42)
1 
2 #include <petsc/private/dmdaimpl.h> /*I      "petscdmda.h"     I*/
3 #include <petscmat.h>
4 #include <petscbt.h>
5 
6 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM, ISColoringType, ISColoring *);
7 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM, ISColoringType, ISColoring *);
8 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM, ISColoringType, ISColoring *);
9 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM, ISColoringType, ISColoring *);
10 
11 /*
12    For ghost i that may be negative or greater than the upper bound this
13   maps it into the 0:m-1 range using periodicity
14 */
15 #define SetInRange(i, m) ((i < 0) ? m + i : ((i >= m) ? i - m : i))
16 
17 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill, PetscInt w, PetscInt **rfill)
18 {
19   PetscInt i, j, nz, *fill;
20 
21   PetscFunctionBegin;
22   if (!dfill) PetscFunctionReturn(PETSC_SUCCESS);
23 
24   /* count number nonzeros */
25   nz = 0;
26   for (i = 0; i < w; i++) {
27     for (j = 0; j < w; j++) {
28       if (dfill[w * i + j]) nz++;
29     }
30   }
31   PetscCall(PetscMalloc1(nz + w + 1, &fill));
32   /* construct modified CSR storage of nonzero structure */
33   /*  fill[0 -- w] marks starts of each row of column indices (and end of last row)
34    so fill[1] - fill[0] gives number of nonzeros in first row etc */
35   nz = w + 1;
36   for (i = 0; i < w; i++) {
37     fill[i] = nz;
38     for (j = 0; j < w; j++) {
39       if (dfill[w * i + j]) {
40         fill[nz] = j;
41         nz++;
42       }
43     }
44   }
45   fill[w] = nz;
46 
47   *rfill = fill;
48   PetscFunctionReturn(PETSC_SUCCESS);
49 }
50 
51 static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse, PetscInt w, PetscInt **rfill)
52 {
53   PetscInt nz;
54 
55   PetscFunctionBegin;
56   if (!dfillsparse) PetscFunctionReturn(PETSC_SUCCESS);
57 
58   /* Determine number of non-zeros */
59   nz = (dfillsparse[w] - w - 1);
60 
61   /* Allocate space for our copy of the given sparse matrix representation. */
62   PetscCall(PetscMalloc1(nz + w + 1, rfill));
63   PetscCall(PetscArraycpy(*rfill, dfillsparse, nz + w + 1));
64   PetscFunctionReturn(PETSC_SUCCESS);
65 }
66 
67 static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd)
68 {
69   PetscInt i, k, cnt = 1;
70 
71   PetscFunctionBegin;
72 
73   /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of
74    columns to the left with any nonzeros in them plus 1 */
75   PetscCall(PetscCalloc1(dd->w, &dd->ofillcols));
76   for (i = 0; i < dd->w; i++) {
77     for (k = dd->ofill[i]; k < dd->ofill[i + 1]; k++) dd->ofillcols[dd->ofill[k]] = 1;
78   }
79   for (i = 0; i < dd->w; i++) {
80     if (dd->ofillcols[i]) dd->ofillcols[i] = cnt++;
81   }
82   PetscFunctionReturn(PETSC_SUCCESS);
83 }
84 
85 /*@
86     DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem
87     of the matrix returned by `DMCreateMatrix()`.
88 
89     Logically Collective on da
90 
91     Input Parameters:
92 +   da - the distributed array
93 .   dfill - the fill pattern in the diagonal block (may be NULL, means use dense block)
94 -   ofill - the fill pattern in the off-diagonal blocks
95 
96     Level: developer
97 
98     Notes:
99     This only makes sense when you are doing multicomponent problems but using the
100        `MATMPIAIJ` matrix format
101 
102            The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries
103        representing coupling and 0 entries for missing coupling. For example
104 .vb
105             dfill[9] = {1, 0, 0,
106                         1, 1, 0,
107                         0, 1, 1}
108 .ve
109        means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with
110        itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the
111        diagonal block).
112 
113      `DMDASetGetMatrix()` allows you to provide general code for those more complicated nonzero patterns then
114      can be represented in the dfill, ofill format
115 
116    Contributed by Glenn Hammond
117 
118 .seealso: `DM`, `DMDA`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()`
119 @*/
120 PetscErrorCode DMDASetBlockFills(DM da, const PetscInt *dfill, const PetscInt *ofill)
121 {
122   DM_DA *dd = (DM_DA *)da->data;
123 
124   PetscFunctionBegin;
125   /* save the given dfill and ofill information */
126   PetscCall(DMDASetBlockFills_Private(dfill, dd->w, &dd->dfill));
127   PetscCall(DMDASetBlockFills_Private(ofill, dd->w, &dd->ofill));
128 
129   /* count nonzeros in ofill columns */
130   PetscCall(DMDASetBlockFills_Private2(dd));
131   PetscFunctionReturn(PETSC_SUCCESS);
132 }
133 
134 /*@
135     DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem
136     of the matrix returned by `DMCreateMatrix()`, using sparse representations
137     of fill patterns.
138 
139     Logically Collective on da
140 
141     Input Parameters:
142 +   da - the distributed array
143 .   dfill - the sparse fill pattern in the diagonal block (may be NULL, means use dense block)
144 -   ofill - the sparse fill pattern in the off-diagonal blocks
145 
146     Level: developer
147 
148     Notes:
149     This only makes sense when you are doing multicomponent problems but using the
150        `MATMPIAIJ` matrix format
151 
152            The format for dfill and ofill is a sparse representation of a
153            dof-by-dof matrix with 1 entries representing coupling and 0 entries
154            for missing coupling.  The sparse representation is a 1 dimensional
155            array of length nz + dof + 1, where nz is the number of non-zeros in
156            the matrix.  The first dof entries in the array give the
157            starting array indices of each row's items in the rest of the array,
158            the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array)
159            and the remaining nz items give the column indices of each of
160            the 1s within the logical 2D matrix.  Each row's items within
161            the array are the column indices of the 1s within that row
162            of the 2D matrix.  PETSc developers may recognize that this is the
163            same format as that computed by the `DMDASetBlockFills_Private()`
164            function from a dense 2D matrix representation.
165 
166      `DMDASetGetMatrix()` allows you to provide general code for those more complicated nonzero patterns then
167      can be represented in the dfill, ofill format
168 
169    Contributed by Philip C. Roth
170 
171 .seealso: `DM`, `DMDA`, `DMDASetBlockFills()`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()`
172 @*/
173 PetscErrorCode DMDASetBlockFillsSparse(DM da, const PetscInt *dfillsparse, const PetscInt *ofillsparse)
174 {
175   DM_DA *dd = (DM_DA *)da->data;
176 
177   PetscFunctionBegin;
178   /* save the given dfill and ofill information */
179   PetscCall(DMDASetBlockFillsSparse_Private(dfillsparse, dd->w, &dd->dfill));
180   PetscCall(DMDASetBlockFillsSparse_Private(ofillsparse, dd->w, &dd->ofill));
181 
182   /* count nonzeros in ofill columns */
183   PetscCall(DMDASetBlockFills_Private2(dd));
184   PetscFunctionReturn(PETSC_SUCCESS);
185 }
186 
187 PetscErrorCode DMCreateColoring_DA(DM da, ISColoringType ctype, ISColoring *coloring)
188 {
189   PetscInt       dim, m, n, p, nc;
190   DMBoundaryType bx, by, bz;
191   MPI_Comm       comm;
192   PetscMPIInt    size;
193   PetscBool      isBAIJ;
194   DM_DA         *dd = (DM_DA *)da->data;
195 
196   PetscFunctionBegin;
197   /*
198                                   m
199           ------------------------------------------------------
200          |                                                     |
201          |                                                     |
202          |               ----------------------                |
203          |               |                    |                |
204       n  |           yn  |                    |                |
205          |               |                    |                |
206          |               .---------------------                |
207          |             (xs,ys)     xn                          |
208          |            .                                        |
209          |         (gxs,gys)                                   |
210          |                                                     |
211           -----------------------------------------------------
212   */
213 
214   /*
215          nc - number of components per grid point
216          col - number of colors needed in one direction for single component problem
217 
218   */
219   PetscCall(DMDAGetInfo(da, &dim, NULL, NULL, NULL, &m, &n, &p, &nc, NULL, &bx, &by, &bz, NULL));
220 
221   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
222   PetscCallMPI(MPI_Comm_size(comm, &size));
223   if (ctype == IS_COLORING_LOCAL) {
224     if (size == 1) {
225       ctype = IS_COLORING_GLOBAL;
226     } else {
227       PetscCheck((dim == 1) || !((m == 1 && bx == DM_BOUNDARY_PERIODIC) || (n == 1 && by == DM_BOUNDARY_PERIODIC) || (p == 1 && bz == DM_BOUNDARY_PERIODIC)), PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain on the same process");
228     }
229   }
230 
231   /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ
232      matrices is for the blocks, not the individual matrix elements  */
233   PetscCall(PetscStrbeginswith(da->mattype, MATBAIJ, &isBAIJ));
234   if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATMPIBAIJ, &isBAIJ));
235   if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATSEQBAIJ, &isBAIJ));
236   if (isBAIJ) {
237     dd->w  = 1;
238     dd->xs = dd->xs / nc;
239     dd->xe = dd->xe / nc;
240     dd->Xs = dd->Xs / nc;
241     dd->Xe = dd->Xe / nc;
242   }
243 
244   /*
245      We do not provide a getcoloring function in the DMDA operations because
246    the basic DMDA does not know about matrices. We think of DMDA as being
247    more low-level then matrices.
248   */
249   if (dim == 1) PetscCall(DMCreateColoring_DA_1d_MPIAIJ(da, ctype, coloring));
250   else if (dim == 2) PetscCall(DMCreateColoring_DA_2d_MPIAIJ(da, ctype, coloring));
251   else if (dim == 3) PetscCall(DMCreateColoring_DA_3d_MPIAIJ(da, ctype, coloring));
252   else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not done for %" PetscInt_FMT " dimension, send us mail petsc-maint@mcs.anl.gov for code", dim);
253   if (isBAIJ) {
254     dd->w  = nc;
255     dd->xs = dd->xs * nc;
256     dd->xe = dd->xe * nc;
257     dd->Xs = dd->Xs * nc;
258     dd->Xe = dd->Xe * nc;
259   }
260   PetscFunctionReturn(PETSC_SUCCESS);
261 }
262 
263 /* ---------------------------------------------------------------------------------*/
264 
265 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
266 {
267   PetscInt         xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, M, N, dim, s, k, nc, col;
268   PetscInt         ncolors = 0;
269   MPI_Comm         comm;
270   DMBoundaryType   bx, by;
271   DMDAStencilType  st;
272   ISColoringValue *colors;
273   DM_DA           *dd = (DM_DA *)da->data;
274 
275   PetscFunctionBegin;
276   /*
277          nc - number of components per grid point
278          col - number of colors needed in one direction for single component problem
279 
280   */
281   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st));
282   col = 2 * s + 1;
283   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
284   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
285   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
286 
287   /* special case as taught to us by Paul Hovland */
288   if (st == DMDA_STENCIL_STAR && s == 1) {
289     PetscCall(DMCreateColoring_DA_2d_5pt_MPIAIJ(da, ctype, coloring));
290   } else {
291     if (ctype == IS_COLORING_GLOBAL) {
292       if (!dd->localcoloring) {
293         PetscCall(PetscMalloc1(nc * nx * ny, &colors));
294         ii = 0;
295         for (j = ys; j < ys + ny; j++) {
296           for (i = xs; i < xs + nx; i++) {
297             for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((i % col) + col * (j % col));
298           }
299         }
300         ncolors = nc + nc * (col - 1 + col * (col - 1));
301         PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring));
302       }
303       *coloring = dd->localcoloring;
304     } else if (ctype == IS_COLORING_LOCAL) {
305       if (!dd->ghostedcoloring) {
306         PetscCall(PetscMalloc1(nc * gnx * gny, &colors));
307         ii = 0;
308         for (j = gys; j < gys + gny; j++) {
309           for (i = gxs; i < gxs + gnx; i++) {
310             for (k = 0; k < nc; k++) {
311               /* the complicated stuff is to handle periodic boundaries */
312               colors[ii++] = k + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col));
313             }
314           }
315         }
316         ncolors = nc + nc * (col - 1 + col * (col - 1));
317         PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
318         /* PetscIntView(ncolors,(PetscInt*)colors,0); */
319 
320         PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
321       }
322       *coloring = dd->ghostedcoloring;
323     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
324   }
325   PetscCall(ISColoringReference(*coloring));
326   PetscFunctionReturn(PETSC_SUCCESS);
327 }
328 
329 /* ---------------------------------------------------------------------------------*/
330 
331 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
332 {
333   PetscInt         xs, ys, nx, ny, i, j, gxs, gys, gnx, gny, m, n, p, dim, s, k, nc, col, zs, gzs, ii, l, nz, gnz, M, N, P;
334   PetscInt         ncolors;
335   MPI_Comm         comm;
336   DMBoundaryType   bx, by, bz;
337   DMDAStencilType  st;
338   ISColoringValue *colors;
339   DM_DA           *dd = (DM_DA *)da->data;
340 
341   PetscFunctionBegin;
342   /*
343          nc - number of components per grid point
344          col - number of colors needed in one direction for single component problem
345   */
346   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
347   col = 2 * s + 1;
348   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
349   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
350   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
351 
352   /* create the coloring */
353   if (ctype == IS_COLORING_GLOBAL) {
354     if (!dd->localcoloring) {
355       PetscCall(PetscMalloc1(nc * nx * ny * nz, &colors));
356       ii = 0;
357       for (k = zs; k < zs + nz; k++) {
358         for (j = ys; j < ys + ny; j++) {
359           for (i = xs; i < xs + nx; i++) {
360             for (l = 0; l < nc; l++) colors[ii++] = l + nc * ((i % col) + col * (j % col) + col * col * (k % col));
361           }
362         }
363       }
364       ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1));
365       PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny * nz, colors, PETSC_OWN_POINTER, &dd->localcoloring));
366     }
367     *coloring = dd->localcoloring;
368   } else if (ctype == IS_COLORING_LOCAL) {
369     if (!dd->ghostedcoloring) {
370       PetscCall(PetscMalloc1(nc * gnx * gny * gnz, &colors));
371       ii = 0;
372       for (k = gzs; k < gzs + gnz; k++) {
373         for (j = gys; j < gys + gny; j++) {
374           for (i = gxs; i < gxs + gnx; i++) {
375             for (l = 0; l < nc; l++) {
376               /* the complicated stuff is to handle periodic boundaries */
377               colors[ii++] = l + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col) + col * col * (SetInRange(k, p) % col));
378             }
379           }
380         }
381       }
382       ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1));
383       PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny * gnz, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
384       PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
385     }
386     *coloring = dd->ghostedcoloring;
387   } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
388   PetscCall(ISColoringReference(*coloring));
389   PetscFunctionReturn(PETSC_SUCCESS);
390 }
391 
392 /* ---------------------------------------------------------------------------------*/
393 
394 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
395 {
396   PetscInt         xs, nx, i, i1, gxs, gnx, l, m, M, dim, s, nc, col;
397   PetscInt         ncolors;
398   MPI_Comm         comm;
399   DMBoundaryType   bx;
400   ISColoringValue *colors;
401   DM_DA           *dd = (DM_DA *)da->data;
402 
403   PetscFunctionBegin;
404   /*
405          nc - number of components per grid point
406          col - number of colors needed in one direction for single component problem
407   */
408   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, &M, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
409   col = 2 * s + 1;
410   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
411   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
412   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
413 
414   /* create the coloring */
415   if (ctype == IS_COLORING_GLOBAL) {
416     if (!dd->localcoloring) {
417       PetscCall(PetscMalloc1(nc * nx, &colors));
418       if (dd->ofillcols) {
419         PetscInt tc = 0;
420         for (i = 0; i < nc; i++) tc += (PetscInt)(dd->ofillcols[i] > 0);
421         i1 = 0;
422         for (i = xs; i < xs + nx; i++) {
423           for (l = 0; l < nc; l++) {
424             if (dd->ofillcols[l] && (i % col)) {
425               colors[i1++] = nc - 1 + tc * ((i % col) - 1) + dd->ofillcols[l];
426             } else {
427               colors[i1++] = l;
428             }
429           }
430         }
431         ncolors = nc + 2 * s * tc;
432       } else {
433         i1 = 0;
434         for (i = xs; i < xs + nx; i++) {
435           for (l = 0; l < nc; l++) colors[i1++] = l + nc * (i % col);
436         }
437         ncolors = nc + nc * (col - 1);
438       }
439       PetscCall(ISColoringCreate(comm, ncolors, nc * nx, colors, PETSC_OWN_POINTER, &dd->localcoloring));
440     }
441     *coloring = dd->localcoloring;
442   } else if (ctype == IS_COLORING_LOCAL) {
443     if (!dd->ghostedcoloring) {
444       PetscCall(PetscMalloc1(nc * gnx, &colors));
445       i1 = 0;
446       for (i = gxs; i < gxs + gnx; i++) {
447         for (l = 0; l < nc; l++) {
448           /* the complicated stuff is to handle periodic boundaries */
449           colors[i1++] = l + nc * (SetInRange(i, m) % col);
450         }
451       }
452       ncolors = nc + nc * (col - 1);
453       PetscCall(ISColoringCreate(comm, ncolors, nc * gnx, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
454       PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
455     }
456     *coloring = dd->ghostedcoloring;
457   } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
458   PetscCall(ISColoringReference(*coloring));
459   PetscFunctionReturn(PETSC_SUCCESS);
460 }
461 
462 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
463 {
464   PetscInt         xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, dim, s, k, nc;
465   PetscInt         ncolors;
466   MPI_Comm         comm;
467   DMBoundaryType   bx, by;
468   ISColoringValue *colors;
469   DM_DA           *dd = (DM_DA *)da->data;
470 
471   PetscFunctionBegin;
472   /*
473          nc - number of components per grid point
474          col - number of colors needed in one direction for single component problem
475   */
476   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, NULL));
477   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
478   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
479   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
480   /* create the coloring */
481   if (ctype == IS_COLORING_GLOBAL) {
482     if (!dd->localcoloring) {
483       PetscCall(PetscMalloc1(nc * nx * ny, &colors));
484       ii = 0;
485       for (j = ys; j < ys + ny; j++) {
486         for (i = xs; i < xs + nx; i++) {
487           for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((3 * j + i) % 5);
488         }
489       }
490       ncolors = 5 * nc;
491       PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring));
492     }
493     *coloring = dd->localcoloring;
494   } else if (ctype == IS_COLORING_LOCAL) {
495     if (!dd->ghostedcoloring) {
496       PetscCall(PetscMalloc1(nc * gnx * gny, &colors));
497       ii = 0;
498       for (j = gys; j < gys + gny; j++) {
499         for (i = gxs; i < gxs + gnx; i++) {
500           for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((3 * SetInRange(j, n) + SetInRange(i, m)) % 5);
501         }
502       }
503       ncolors = 5 * nc;
504       PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
505       PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
506     }
507     *coloring = dd->ghostedcoloring;
508   } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
509   PetscFunctionReturn(PETSC_SUCCESS);
510 }
511 
512 /* =========================================================================== */
513 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM, Mat);
514 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM, Mat);
515 extern PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM, Mat);
516 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM, Mat);
517 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM, Mat);
518 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM, Mat);
519 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM, Mat);
520 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM, Mat);
521 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM, Mat);
522 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM, Mat);
523 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM, Mat);
524 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM, Mat);
525 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM, Mat);
526 extern PetscErrorCode DMCreateMatrix_DA_IS(DM, Mat);
527 
528 /*@C
529    MatSetupDM - Sets the `DMDA` that is to be used by the HYPRE_StructMatrix PETSc matrix
530 
531    Logically Collective on mat
532 
533    Input Parameters:
534 +  mat - the matrix
535 -  da - the da
536 
537    Level: intermediate
538 
539 .seealso: `Mat`, `MatSetUp()`
540 @*/
541 PetscErrorCode MatSetupDM(Mat mat, DM da)
542 {
543   PetscFunctionBegin;
544   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
545   PetscValidHeaderSpecificType(da, DM_CLASSID, 2, DMDA);
546   PetscTryMethod(mat, "MatSetupDM_C", (Mat, DM), (mat, da));
547   PetscFunctionReturn(PETSC_SUCCESS);
548 }
549 
550 PetscErrorCode MatView_MPI_DA(Mat A, PetscViewer viewer)
551 {
552   DM                da;
553   const char       *prefix;
554   Mat               Anatural;
555   AO                ao;
556   PetscInt          rstart, rend, *petsc, i;
557   IS                is;
558   MPI_Comm          comm;
559   PetscViewerFormat format;
560 
561   PetscFunctionBegin;
562   /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */
563   PetscCall(PetscViewerGetFormat(viewer, &format));
564   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(PETSC_SUCCESS);
565 
566   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
567   PetscCall(MatGetDM(A, &da));
568   PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA");
569 
570   PetscCall(DMDAGetAO(da, &ao));
571   PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
572   PetscCall(PetscMalloc1(rend - rstart, &petsc));
573   for (i = rstart; i < rend; i++) petsc[i - rstart] = i;
574   PetscCall(AOApplicationToPetsc(ao, rend - rstart, petsc));
575   PetscCall(ISCreateGeneral(comm, rend - rstart, petsc, PETSC_OWN_POINTER, &is));
576 
577   /* call viewer on natural ordering */
578   PetscCall(MatCreateSubMatrix(A, is, is, MAT_INITIAL_MATRIX, &Anatural));
579   PetscCall(ISDestroy(&is));
580   PetscCall(PetscObjectGetOptionsPrefix((PetscObject)A, &prefix));
581   PetscCall(PetscObjectSetOptionsPrefix((PetscObject)Anatural, prefix));
582   PetscCall(PetscObjectSetName((PetscObject)Anatural, ((PetscObject)A)->name));
583   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE;
584   PetscCall(MatView(Anatural, viewer));
585   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE;
586   PetscCall(MatDestroy(&Anatural));
587   PetscFunctionReturn(PETSC_SUCCESS);
588 }
589 
590 PetscErrorCode MatLoad_MPI_DA(Mat A, PetscViewer viewer)
591 {
592   DM       da;
593   Mat      Anatural, Aapp;
594   AO       ao;
595   PetscInt rstart, rend, *app, i, m, n, M, N;
596   IS       is;
597   MPI_Comm comm;
598 
599   PetscFunctionBegin;
600   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
601   PetscCall(MatGetDM(A, &da));
602   PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA");
603 
604   /* Load the matrix in natural ordering */
605   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Anatural));
606   PetscCall(MatSetType(Anatural, ((PetscObject)A)->type_name));
607   PetscCall(MatGetSize(A, &M, &N));
608   PetscCall(MatGetLocalSize(A, &m, &n));
609   PetscCall(MatSetSizes(Anatural, m, n, M, N));
610   PetscCall(MatLoad(Anatural, viewer));
611 
612   /* Map natural ordering to application ordering and create IS */
613   PetscCall(DMDAGetAO(da, &ao));
614   PetscCall(MatGetOwnershipRange(Anatural, &rstart, &rend));
615   PetscCall(PetscMalloc1(rend - rstart, &app));
616   for (i = rstart; i < rend; i++) app[i - rstart] = i;
617   PetscCall(AOPetscToApplication(ao, rend - rstart, app));
618   PetscCall(ISCreateGeneral(comm, rend - rstart, app, PETSC_OWN_POINTER, &is));
619 
620   /* Do permutation and replace header */
621   PetscCall(MatCreateSubMatrix(Anatural, is, is, MAT_INITIAL_MATRIX, &Aapp));
622   PetscCall(MatHeaderReplace(A, &Aapp));
623   PetscCall(ISDestroy(&is));
624   PetscCall(MatDestroy(&Anatural));
625   PetscFunctionReturn(PETSC_SUCCESS);
626 }
627 
628 PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J)
629 {
630   PetscInt    dim, dof, nx, ny, nz, dims[3], starts[3], M, N, P;
631   Mat         A;
632   MPI_Comm    comm;
633   MatType     Atype;
634   MatType     mtype;
635   PetscMPIInt size;
636   DM_DA      *dd    = (DM_DA *)da->data;
637   void (*aij)(void) = NULL, (*baij)(void) = NULL, (*sbaij)(void) = NULL, (*sell)(void) = NULL, (*is)(void) = NULL;
638 
639   PetscFunctionBegin;
640   PetscCall(MatInitializePackage());
641   mtype = da->mattype;
642 
643   /*
644                                   m
645           ------------------------------------------------------
646          |                                                     |
647          |                                                     |
648          |               ----------------------                |
649          |               |                    |                |
650       n  |           ny  |                    |                |
651          |               |                    |                |
652          |               .---------------------                |
653          |             (xs,ys)     nx                          |
654          |            .                                        |
655          |         (gxs,gys)                                   |
656          |                                                     |
657           -----------------------------------------------------
658   */
659 
660   /*
661          nc - number of components per grid point
662          col - number of colors needed in one direction for single component problem
663   */
664   M   = dd->M;
665   N   = dd->N;
666   P   = dd->P;
667   dim = da->dim;
668   dof = dd->w;
669   /* PetscCall(DMDAGetInfo(da,&dim,&M,&N,&P,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL)); */
670   PetscCall(DMDAGetCorners(da, NULL, NULL, NULL, &nx, &ny, &nz));
671   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
672   PetscCall(MatCreate(comm, &A));
673   PetscCall(MatSetSizes(A, dof * nx * ny * nz, dof * nx * ny * nz, dof * M * N * P, dof * M * N * P));
674   PetscCall(MatSetType(A, mtype));
675   PetscCall(MatSetFromOptions(A));
676   if (dof * nx * ny * nz < da->bind_below) {
677     PetscCall(MatSetBindingPropagates(A, PETSC_TRUE));
678     PetscCall(MatBindToCPU(A, PETSC_TRUE));
679   }
680   PetscCall(MatSetDM(A, da));
681   if (da->structure_only) PetscCall(MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE));
682   PetscCall(MatGetType(A, &Atype));
683   /*
684      We do not provide a getmatrix function in the DMDA operations because
685    the basic DMDA does not know about matrices. We think of DMDA as being more
686    more low-level than matrices. This is kind of cheating but, cause sometimes
687    we think of DMDA has higher level than matrices.
688 
689      We could switch based on Atype (or mtype), but we do not since the
690    specialized setting routines depend only on the particular preallocation
691    details of the matrix, not the type itself.
692   */
693   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJSetPreallocation_C", &aij));
694   if (!aij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJSetPreallocation_C", &aij));
695   if (!aij) {
696     PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIBAIJSetPreallocation_C", &baij));
697     if (!baij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", &baij));
698     if (!baij) {
699       PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPISBAIJSetPreallocation_C", &sbaij));
700       if (!sbaij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqSBAIJSetPreallocation_C", &sbaij));
701       if (!sbaij) {
702         PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPISELLSetPreallocation_C", &sell));
703         if (!sell) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqSELLSetPreallocation_C", &sell));
704       }
705       if (!sell) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatISSetPreallocation_C", &is));
706     }
707   }
708   if (aij) {
709     if (dim == 1) {
710       if (dd->ofill) {
711         PetscCall(DMCreateMatrix_DA_1d_MPIAIJ_Fill(da, A));
712       } else {
713         DMBoundaryType bx;
714         PetscMPIInt    size;
715         PetscCall(DMDAGetInfo(da, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &bx, NULL, NULL, NULL));
716         PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size));
717         if (size == 1 && bx == DM_BOUNDARY_NONE) {
718           PetscCall(DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(da, A));
719         } else {
720           PetscCall(DMCreateMatrix_DA_1d_MPIAIJ(da, A));
721         }
722       }
723     } else if (dim == 2) {
724       if (dd->ofill) {
725         PetscCall(DMCreateMatrix_DA_2d_MPIAIJ_Fill(da, A));
726       } else {
727         PetscCall(DMCreateMatrix_DA_2d_MPIAIJ(da, A));
728       }
729     } else if (dim == 3) {
730       if (dd->ofill) {
731         PetscCall(DMCreateMatrix_DA_3d_MPIAIJ_Fill(da, A));
732       } else {
733         PetscCall(DMCreateMatrix_DA_3d_MPIAIJ(da, A));
734       }
735     }
736   } else if (baij) {
737     if (dim == 2) {
738       PetscCall(DMCreateMatrix_DA_2d_MPIBAIJ(da, A));
739     } else if (dim == 3) {
740       PetscCall(DMCreateMatrix_DA_3d_MPIBAIJ(da, A));
741     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
742   } else if (sbaij) {
743     if (dim == 2) {
744       PetscCall(DMCreateMatrix_DA_2d_MPISBAIJ(da, A));
745     } else if (dim == 3) {
746       PetscCall(DMCreateMatrix_DA_3d_MPISBAIJ(da, A));
747     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
748   } else if (sell) {
749     if (dim == 2) {
750       PetscCall(DMCreateMatrix_DA_2d_MPISELL(da, A));
751     } else if (dim == 3) {
752       PetscCall(DMCreateMatrix_DA_3d_MPISELL(da, A));
753     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
754   } else if (is) {
755     PetscCall(DMCreateMatrix_DA_IS(da, A));
756   } else {
757     ISLocalToGlobalMapping ltog;
758 
759     PetscCall(MatSetBlockSize(A, dof));
760     PetscCall(MatSetUp(A));
761     PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
762     PetscCall(MatSetLocalToGlobalMapping(A, ltog, ltog));
763   }
764   PetscCall(DMDAGetGhostCorners(da, &starts[0], &starts[1], &starts[2], &dims[0], &dims[1], &dims[2]));
765   PetscCall(MatSetStencil(A, dim, dims, starts, dof));
766   PetscCall(MatSetDM(A, da));
767   PetscCallMPI(MPI_Comm_size(comm, &size));
768   if (size > 1) {
769     /* change viewer to display matrix in natural ordering */
770     PetscCall(MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA));
771     PetscCall(MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA));
772   }
773   *J = A;
774   PetscFunctionReturn(PETSC_SUCCESS);
775 }
776 
777 /* ---------------------------------------------------------------------------------*/
778 PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]);
779 
780 PetscErrorCode DMCreateMatrix_DA_IS(DM dm, Mat J)
781 {
782   DM_DA                 *da = (DM_DA *)dm->data;
783   Mat                    lJ, P;
784   ISLocalToGlobalMapping ltog;
785   IS                     is;
786   PetscBT                bt;
787   const PetscInt        *e_loc, *idx;
788   PetscInt               i, nel, nen, nv, dof, *gidx, n, N;
789 
790   /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted)
791      We need to filter out the local indices that are not represented through the DMDAGetElements decomposition */
792   PetscFunctionBegin;
793   dof = da->w;
794   PetscCall(MatSetBlockSize(J, dof));
795   PetscCall(DMGetLocalToGlobalMapping(dm, &ltog));
796 
797   /* flag local elements indices in local DMDA numbering */
798   PetscCall(ISLocalToGlobalMappingGetSize(ltog, &nv));
799   PetscCall(PetscBTCreate(nv / dof, &bt));
800   PetscCall(DMDAGetElements(dm, &nel, &nen, &e_loc)); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */
801   for (i = 0; i < nel * nen; i++) PetscCall(PetscBTSet(bt, e_loc[i]));
802 
803   /* filter out (set to -1) the global indices not used by the local elements */
804   PetscCall(PetscMalloc1(nv / dof, &gidx));
805   PetscCall(ISLocalToGlobalMappingGetBlockIndices(ltog, &idx));
806   PetscCall(PetscArraycpy(gidx, idx, nv / dof));
807   PetscCall(ISLocalToGlobalMappingRestoreBlockIndices(ltog, &idx));
808   for (i = 0; i < nv / dof; i++)
809     if (!PetscBTLookup(bt, i)) gidx[i] = -1;
810   PetscCall(PetscBTDestroy(&bt));
811   PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)dm), dof, nv / dof, gidx, PETSC_OWN_POINTER, &is));
812   PetscCall(ISLocalToGlobalMappingCreateIS(is, &ltog));
813   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
814   PetscCall(ISLocalToGlobalMappingDestroy(&ltog));
815   PetscCall(ISDestroy(&is));
816 
817   /* Preallocation */
818   if (dm->prealloc_skip) {
819     PetscCall(MatSetUp(J));
820   } else {
821     PetscCall(MatISGetLocalMat(J, &lJ));
822     PetscCall(MatGetLocalToGlobalMapping(lJ, &ltog, NULL));
823     PetscCall(MatCreate(PetscObjectComm((PetscObject)lJ), &P));
824     PetscCall(MatSetType(P, MATPREALLOCATOR));
825     PetscCall(MatSetLocalToGlobalMapping(P, ltog, ltog));
826     PetscCall(MatGetSize(lJ, &N, NULL));
827     PetscCall(MatGetLocalSize(lJ, &n, NULL));
828     PetscCall(MatSetSizes(P, n, n, N, N));
829     PetscCall(MatSetBlockSize(P, dof));
830     PetscCall(MatSetUp(P));
831     for (i = 0; i < nel; i++) PetscCall(MatSetValuesBlockedLocal(P, nen, e_loc + i * nen, nen, e_loc + i * nen, NULL, INSERT_VALUES));
832     PetscCall(MatPreallocatorPreallocate(P, (PetscBool)!da->prealloc_only, lJ));
833     PetscCall(MatISRestoreLocalMat(J, &lJ));
834     PetscCall(DMDARestoreElements(dm, &nel, &nen, &e_loc));
835     PetscCall(MatDestroy(&P));
836 
837     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
838     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
839   }
840   PetscFunctionReturn(PETSC_SUCCESS);
841 }
842 
843 PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da, Mat J)
844 {
845   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p;
846   PetscInt               lstart, lend, pstart, pend, *dnz, *onz;
847   MPI_Comm               comm;
848   PetscScalar           *values;
849   DMBoundaryType         bx, by;
850   ISLocalToGlobalMapping ltog;
851   DMDAStencilType        st;
852 
853   PetscFunctionBegin;
854   /*
855          nc - number of components per grid point
856          col - number of colors needed in one direction for single component problem
857   */
858   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st));
859   col = 2 * s + 1;
860   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
861   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
862   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
863 
864   PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols));
865   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
866 
867   PetscCall(MatSetBlockSize(J, nc));
868   /* determine the matrix preallocation information */
869   MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
870   for (i = xs; i < xs + nx; i++) {
871     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
872     pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
873 
874     for (j = ys; j < ys + ny; j++) {
875       slot = i - gxs + gnx * (j - gys);
876 
877       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
878       lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
879 
880       cnt = 0;
881       for (k = 0; k < nc; k++) {
882         for (l = lstart; l < lend + 1; l++) {
883           for (p = pstart; p < pend + 1; p++) {
884             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
885               cols[cnt++] = k + nc * (slot + gnx * l + p);
886             }
887           }
888         }
889         rows[k] = k + nc * (slot);
890       }
891       PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
892     }
893   }
894   PetscCall(MatSetBlockSize(J, nc));
895   PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz));
896   PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz));
897   MatPreallocateEnd(dnz, onz);
898 
899   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
900 
901   /*
902     For each node in the grid: we get the neighbors in the local (on processor ordering
903     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
904     PETSc ordering.
905   */
906   if (!da->prealloc_only) {
907     PetscCall(PetscCalloc1(col * col * nc * nc, &values));
908     for (i = xs; i < xs + nx; i++) {
909       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
910       pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
911 
912       for (j = ys; j < ys + ny; j++) {
913         slot = i - gxs + gnx * (j - gys);
914 
915         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
916         lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
917 
918         cnt = 0;
919         for (k = 0; k < nc; k++) {
920           for (l = lstart; l < lend + 1; l++) {
921             for (p = pstart; p < pend + 1; p++) {
922               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
923                 cols[cnt++] = k + nc * (slot + gnx * l + p);
924               }
925             }
926           }
927           rows[k] = k + nc * (slot);
928         }
929         PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES));
930       }
931     }
932     PetscCall(PetscFree(values));
933     /* do not copy values to GPU since they are all zero and not yet needed there */
934     PetscCall(MatBindToCPU(J, PETSC_TRUE));
935     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
936     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
937     PetscCall(MatBindToCPU(J, PETSC_FALSE));
938     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
939   }
940   PetscCall(PetscFree2(rows, cols));
941   PetscFunctionReturn(PETSC_SUCCESS);
942 }
943 
944 PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da, Mat J)
945 {
946   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
947   PetscInt               m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL;
948   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
949   MPI_Comm               comm;
950   PetscScalar           *values;
951   DMBoundaryType         bx, by, bz;
952   ISLocalToGlobalMapping ltog;
953   DMDAStencilType        st;
954 
955   PetscFunctionBegin;
956   /*
957          nc - number of components per grid point
958          col - number of colors needed in one direction for single component problem
959   */
960   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
961   col = 2 * s + 1;
962   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
963   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
964   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
965 
966   PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols));
967   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
968 
969   PetscCall(MatSetBlockSize(J, nc));
970   /* determine the matrix preallocation information */
971   MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
972   for (i = xs; i < xs + nx; i++) {
973     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
974     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
975     for (j = ys; j < ys + ny; j++) {
976       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
977       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
978       for (k = zs; k < zs + nz; k++) {
979         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
980         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
981 
982         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
983 
984         cnt = 0;
985         for (l = 0; l < nc; l++) {
986           for (ii = istart; ii < iend + 1; ii++) {
987             for (jj = jstart; jj < jend + 1; jj++) {
988               for (kk = kstart; kk < kend + 1; kk++) {
989                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
990                   cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
991                 }
992               }
993             }
994           }
995           rows[l] = l + nc * (slot);
996         }
997         PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
998       }
999     }
1000   }
1001   PetscCall(MatSetBlockSize(J, nc));
1002   PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz));
1003   PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz));
1004   MatPreallocateEnd(dnz, onz);
1005   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1006 
1007   /*
1008     For each node in the grid: we get the neighbors in the local (on processor ordering
1009     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1010     PETSc ordering.
1011   */
1012   if (!da->prealloc_only) {
1013     PetscCall(PetscCalloc1(col * col * col * nc * nc * nc, &values));
1014     for (i = xs; i < xs + nx; i++) {
1015       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1016       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1017       for (j = ys; j < ys + ny; j++) {
1018         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1019         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1020         for (k = zs; k < zs + nz; k++) {
1021           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1022           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1023 
1024           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1025 
1026           cnt = 0;
1027           for (l = 0; l < nc; l++) {
1028             for (ii = istart; ii < iend + 1; ii++) {
1029               for (jj = jstart; jj < jend + 1; jj++) {
1030                 for (kk = kstart; kk < kend + 1; kk++) {
1031                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1032                     cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
1033                   }
1034                 }
1035               }
1036             }
1037             rows[l] = l + nc * (slot);
1038           }
1039           PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES));
1040         }
1041       }
1042     }
1043     PetscCall(PetscFree(values));
1044     /* do not copy values to GPU since they are all zero and not yet needed there */
1045     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1046     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1047     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1048     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1049     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1050   }
1051   PetscCall(PetscFree2(rows, cols));
1052   PetscFunctionReturn(PETSC_SUCCESS);
1053 }
1054 
1055 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da, Mat J)
1056 {
1057   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, M, N;
1058   PetscInt               lstart, lend, pstart, pend, *dnz, *onz;
1059   MPI_Comm               comm;
1060   DMBoundaryType         bx, by;
1061   ISLocalToGlobalMapping ltog, mltog;
1062   DMDAStencilType        st;
1063   PetscBool              removedups = PETSC_FALSE, alreadyboundtocpu = PETSC_TRUE;
1064 
1065   PetscFunctionBegin;
1066   /*
1067          nc - number of components per grid point
1068          col - number of colors needed in one direction for single component problem
1069   */
1070   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st));
1071   if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE));
1072   col = 2 * s + 1;
1073   /*
1074        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1075        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1076   */
1077   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1078   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1079   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1080   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1081   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1082 
1083   PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols));
1084   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1085 
1086   PetscCall(MatSetBlockSize(J, nc));
1087   /* determine the matrix preallocation information */
1088   MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
1089   for (i = xs; i < xs + nx; i++) {
1090     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1091     pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1092 
1093     for (j = ys; j < ys + ny; j++) {
1094       slot = i - gxs + gnx * (j - gys);
1095 
1096       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1097       lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1098 
1099       cnt = 0;
1100       for (k = 0; k < nc; k++) {
1101         for (l = lstart; l < lend + 1; l++) {
1102           for (p = pstart; p < pend + 1; p++) {
1103             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
1104               cols[cnt++] = k + nc * (slot + gnx * l + p);
1105             }
1106           }
1107         }
1108         rows[k] = k + nc * (slot);
1109       }
1110       if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1111       else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1112     }
1113   }
1114   PetscCall(MatSetBlockSize(J, nc));
1115   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
1116   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
1117   MatPreallocateEnd(dnz, onz);
1118   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1119   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1120 
1121   /*
1122     For each node in the grid: we get the neighbors in the local (on processor ordering
1123     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1124     PETSc ordering.
1125   */
1126   if (!da->prealloc_only) {
1127     for (i = xs; i < xs + nx; i++) {
1128       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1129       pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1130 
1131       for (j = ys; j < ys + ny; j++) {
1132         slot = i - gxs + gnx * (j - gys);
1133 
1134         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1135         lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1136 
1137         cnt = 0;
1138         for (l = lstart; l < lend + 1; l++) {
1139           for (p = pstart; p < pend + 1; p++) {
1140             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
1141               cols[cnt++] = nc * (slot + gnx * l + p);
1142               for (k = 1; k < nc; k++) {
1143                 cols[cnt] = 1 + cols[cnt - 1];
1144                 cnt++;
1145               }
1146             }
1147           }
1148         }
1149         for (k = 0; k < nc; k++) rows[k] = k + nc * (slot);
1150         PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1151       }
1152     }
1153     /* do not copy values to GPU since they are all zero and not yet needed there */
1154     PetscCall(MatBoundToCPU(J, &alreadyboundtocpu));
1155     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1156     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1157     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1158     if (!alreadyboundtocpu) PetscCall(MatBindToCPU(J, PETSC_FALSE));
1159     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1160     if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1161   }
1162   PetscCall(PetscFree2(rows, cols));
1163   PetscFunctionReturn(PETSC_SUCCESS);
1164 }
1165 
1166 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da, Mat J)
1167 {
1168   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1169   PetscInt               m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, M, N;
1170   PetscInt               lstart, lend, pstart, pend, *dnz, *onz;
1171   DM_DA                 *dd = (DM_DA *)da->data;
1172   PetscInt               ifill_col, *ofill = dd->ofill, *dfill = dd->dfill;
1173   MPI_Comm               comm;
1174   DMBoundaryType         bx, by;
1175   ISLocalToGlobalMapping ltog;
1176   DMDAStencilType        st;
1177   PetscBool              removedups = PETSC_FALSE;
1178 
1179   PetscFunctionBegin;
1180   /*
1181          nc - number of components per grid point
1182          col - number of colors needed in one direction for single component problem
1183   */
1184   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st));
1185   col = 2 * s + 1;
1186   /*
1187        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1188        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1189   */
1190   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1191   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1192   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1193   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1194   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1195 
1196   PetscCall(PetscMalloc1(col * col * nc, &cols));
1197   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1198 
1199   PetscCall(MatSetBlockSize(J, nc));
1200   /* determine the matrix preallocation information */
1201   MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
1202   for (i = xs; i < xs + nx; i++) {
1203     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1204     pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1205 
1206     for (j = ys; j < ys + ny; j++) {
1207       slot = i - gxs + gnx * (j - gys);
1208 
1209       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1210       lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1211 
1212       for (k = 0; k < nc; k++) {
1213         cnt = 0;
1214         for (l = lstart; l < lend + 1; l++) {
1215           for (p = pstart; p < pend + 1; p++) {
1216             if (l || p) {
1217               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */
1218                 for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p);
1219               }
1220             } else {
1221               if (dfill) {
1222                 for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p);
1223               } else {
1224                 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p);
1225               }
1226             }
1227           }
1228         }
1229         row    = k + nc * (slot);
1230         maxcnt = PetscMax(maxcnt, cnt);
1231         if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
1232         else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
1233       }
1234     }
1235   }
1236   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
1237   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
1238   MatPreallocateEnd(dnz, onz);
1239   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1240 
1241   /*
1242     For each node in the grid: we get the neighbors in the local (on processor ordering
1243     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1244     PETSc ordering.
1245   */
1246   if (!da->prealloc_only) {
1247     for (i = xs; i < xs + nx; i++) {
1248       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1249       pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1250 
1251       for (j = ys; j < ys + ny; j++) {
1252         slot = i - gxs + gnx * (j - gys);
1253 
1254         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1255         lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1256 
1257         for (k = 0; k < nc; k++) {
1258           cnt = 0;
1259           for (l = lstart; l < lend + 1; l++) {
1260             for (p = pstart; p < pend + 1; p++) {
1261               if (l || p) {
1262                 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */
1263                   for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p);
1264                 }
1265               } else {
1266                 if (dfill) {
1267                   for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p);
1268                 } else {
1269                   for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p);
1270                 }
1271               }
1272             }
1273           }
1274           row = k + nc * (slot);
1275           PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1276         }
1277       }
1278     }
1279     /* do not copy values to GPU since they are all zero and not yet needed there */
1280     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1281     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1282     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1283     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1284     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1285   }
1286   PetscCall(PetscFree(cols));
1287   PetscFunctionReturn(PETSC_SUCCESS);
1288 }
1289 
1290 /* ---------------------------------------------------------------------------------*/
1291 
1292 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da, Mat J)
1293 {
1294   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1295   PetscInt               m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL;
1296   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
1297   MPI_Comm               comm;
1298   DMBoundaryType         bx, by, bz;
1299   ISLocalToGlobalMapping ltog, mltog;
1300   DMDAStencilType        st;
1301   PetscBool              removedups = PETSC_FALSE;
1302 
1303   PetscFunctionBegin;
1304   /*
1305          nc - number of components per grid point
1306          col - number of colors needed in one direction for single component problem
1307   */
1308   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
1309   if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE));
1310   col = 2 * s + 1;
1311 
1312   /*
1313        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1314        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1315   */
1316   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1317   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1318   if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE;
1319 
1320   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
1321   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
1322   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1323 
1324   PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols));
1325   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1326 
1327   PetscCall(MatSetBlockSize(J, nc));
1328   /* determine the matrix preallocation information */
1329   MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
1330   for (i = xs; i < xs + nx; i++) {
1331     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1332     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1333     for (j = ys; j < ys + ny; j++) {
1334       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1335       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1336       for (k = zs; k < zs + nz; k++) {
1337         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1338         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1339 
1340         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1341 
1342         cnt = 0;
1343         for (l = 0; l < nc; l++) {
1344           for (ii = istart; ii < iend + 1; ii++) {
1345             for (jj = jstart; jj < jend + 1; jj++) {
1346               for (kk = kstart; kk < kend + 1; kk++) {
1347                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1348                   cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
1349                 }
1350               }
1351             }
1352           }
1353           rows[l] = l + nc * (slot);
1354         }
1355         if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1356         else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1357       }
1358     }
1359   }
1360   PetscCall(MatSetBlockSize(J, nc));
1361   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
1362   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
1363   MatPreallocateEnd(dnz, onz);
1364   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1365   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1366 
1367   /*
1368     For each node in the grid: we get the neighbors in the local (on processor ordering
1369     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1370     PETSc ordering.
1371   */
1372   if (!da->prealloc_only) {
1373     for (i = xs; i < xs + nx; i++) {
1374       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1375       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1376       for (j = ys; j < ys + ny; j++) {
1377         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1378         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1379         for (k = zs; k < zs + nz; k++) {
1380           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1381           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1382 
1383           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1384 
1385           cnt = 0;
1386           for (kk = kstart; kk < kend + 1; kk++) {
1387             for (jj = jstart; jj < jend + 1; jj++) {
1388               for (ii = istart; ii < iend + 1; ii++) {
1389                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1390                   cols[cnt++] = nc * (slot + ii + gnx * jj + gnx * gny * kk);
1391                   for (l = 1; l < nc; l++) {
1392                     cols[cnt] = 1 + cols[cnt - 1];
1393                     cnt++;
1394                   }
1395                 }
1396               }
1397             }
1398           }
1399           rows[0] = nc * (slot);
1400           for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1401           PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1402         }
1403       }
1404     }
1405     /* do not copy values to GPU since they are all zero and not yet needed there */
1406     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1407     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1408     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1409     if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1410     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1411     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1412   }
1413   PetscCall(PetscFree2(rows, cols));
1414   PetscFunctionReturn(PETSC_SUCCESS);
1415 }
1416 
1417 /* ---------------------------------------------------------------------------------*/
1418 
1419 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da, Mat J)
1420 {
1421   DM_DA                 *dd = (DM_DA *)da->data;
1422   PetscInt               xs, nx, i, j, gxs, gnx, row, k, l;
1423   PetscInt               m, dim, s, *cols = NULL, nc, cnt, maxcnt = 0, *ocols;
1424   PetscInt              *ofill = dd->ofill, *dfill = dd->dfill;
1425   DMBoundaryType         bx;
1426   ISLocalToGlobalMapping ltog;
1427   PetscMPIInt            rank, size;
1428 
1429   PetscFunctionBegin;
1430   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)da), &rank));
1431   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size));
1432 
1433   /*
1434          nc - number of components per grid point
1435   */
1436   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
1437   PetscCheck(s <= 1, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Matrix creation for 1d not implemented correctly for stencil width larger than 1");
1438   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
1439   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
1440 
1441   PetscCall(MatSetBlockSize(J, nc));
1442   PetscCall(PetscCalloc2(nx * nc, &cols, nx * nc, &ocols));
1443 
1444   /*
1445         note should be smaller for first and last process with no periodic
1446         does not handle dfill
1447   */
1448   cnt = 0;
1449   /* coupling with process to the left */
1450   for (i = 0; i < s; i++) {
1451     for (j = 0; j < nc; j++) {
1452       ocols[cnt] = ((rank == 0) ? 0 : (s - i) * (ofill[j + 1] - ofill[j]));
1453       cols[cnt]  = dfill[j + 1] - dfill[j] + (s + i) * (ofill[j + 1] - ofill[j]);
1454       if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1455         if (size > 1) ocols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]);
1456         else cols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]);
1457       }
1458       maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1459       cnt++;
1460     }
1461   }
1462   for (i = s; i < nx - s; i++) {
1463     for (j = 0; j < nc; j++) {
1464       cols[cnt] = dfill[j + 1] - dfill[j] + 2 * s * (ofill[j + 1] - ofill[j]);
1465       maxcnt    = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1466       cnt++;
1467     }
1468   }
1469   /* coupling with process to the right */
1470   for (i = nx - s; i < nx; i++) {
1471     for (j = 0; j < nc; j++) {
1472       ocols[cnt] = ((rank == (size - 1)) ? 0 : (i - nx + s + 1) * (ofill[j + 1] - ofill[j]));
1473       cols[cnt]  = dfill[j + 1] - dfill[j] + (s + nx - i - 1) * (ofill[j + 1] - ofill[j]);
1474       if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1475         if (size > 1) ocols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]);
1476         else cols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]);
1477       }
1478       maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1479       cnt++;
1480     }
1481   }
1482 
1483   PetscCall(MatSeqAIJSetPreallocation(J, 0, cols));
1484   PetscCall(MatMPIAIJSetPreallocation(J, 0, cols, 0, ocols));
1485   PetscCall(PetscFree2(cols, ocols));
1486 
1487   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1488   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1489 
1490   /*
1491     For each node in the grid: we get the neighbors in the local (on processor ordering
1492     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1493     PETSc ordering.
1494   */
1495   if (!da->prealloc_only) {
1496     PetscCall(PetscMalloc1(maxcnt, &cols));
1497     row = xs * nc;
1498     /* coupling with process to the left */
1499     for (i = xs; i < xs + s; i++) {
1500       for (j = 0; j < nc; j++) {
1501         cnt = 0;
1502         if (rank) {
1503           for (l = 0; l < s; l++) {
1504             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1505           }
1506         }
1507         if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1508           for (l = 0; l < s; l++) {
1509             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (m + i - s - l) * nc + ofill[k];
1510           }
1511         }
1512         if (dfill) {
1513           for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1514         } else {
1515           for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1516         }
1517         for (l = 0; l < s; l++) {
1518           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1519         }
1520         PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1521         row++;
1522       }
1523     }
1524     for (i = xs + s; i < xs + nx - s; i++) {
1525       for (j = 0; j < nc; j++) {
1526         cnt = 0;
1527         for (l = 0; l < s; l++) {
1528           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1529         }
1530         if (dfill) {
1531           for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1532         } else {
1533           for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1534         }
1535         for (l = 0; l < s; l++) {
1536           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1537         }
1538         PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1539         row++;
1540       }
1541     }
1542     /* coupling with process to the right */
1543     for (i = xs + nx - s; i < xs + nx; i++) {
1544       for (j = 0; j < nc; j++) {
1545         cnt = 0;
1546         for (l = 0; l < s; l++) {
1547           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1548         }
1549         if (dfill) {
1550           for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1551         } else {
1552           for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1553         }
1554         if (rank < size - 1) {
1555           for (l = 0; l < s; l++) {
1556             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1557           }
1558         }
1559         if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1560           for (l = 0; l < s; l++) {
1561             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s - l - m + 2) * nc + ofill[k];
1562           }
1563         }
1564         PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1565         row++;
1566       }
1567     }
1568     PetscCall(PetscFree(cols));
1569     /* do not copy values to GPU since they are all zero and not yet needed there */
1570     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1571     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1572     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1573     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1574     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1575   }
1576   PetscFunctionReturn(PETSC_SUCCESS);
1577 }
1578 
1579 /* ---------------------------------------------------------------------------------*/
1580 
1581 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da, Mat J)
1582 {
1583   PetscInt               xs, nx, i, i1, slot, gxs, gnx;
1584   PetscInt               m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l;
1585   PetscInt               istart, iend;
1586   DMBoundaryType         bx;
1587   ISLocalToGlobalMapping ltog, mltog;
1588 
1589   PetscFunctionBegin;
1590   /*
1591          nc - number of components per grid point
1592          col - number of colors needed in one direction for single component problem
1593   */
1594   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
1595   if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE));
1596   col = 2 * s + 1;
1597 
1598   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
1599   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
1600 
1601   PetscCall(MatSetBlockSize(J, nc));
1602   PetscCall(MatSeqAIJSetPreallocation(J, col * nc, NULL));
1603   PetscCall(MatMPIAIJSetPreallocation(J, col * nc, NULL, col * nc, NULL));
1604 
1605   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1606   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1607   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1608 
1609   /*
1610     For each node in the grid: we get the neighbors in the local (on processor ordering
1611     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1612     PETSc ordering.
1613   */
1614   if (!da->prealloc_only) {
1615     PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols));
1616     for (i = xs; i < xs + nx; i++) {
1617       istart = PetscMax(-s, gxs - i);
1618       iend   = PetscMin(s, gxs + gnx - i - 1);
1619       slot   = i - gxs;
1620 
1621       cnt = 0;
1622       for (i1 = istart; i1 < iend + 1; i1++) {
1623         cols[cnt++] = nc * (slot + i1);
1624         for (l = 1; l < nc; l++) {
1625           cols[cnt] = 1 + cols[cnt - 1];
1626           cnt++;
1627         }
1628       }
1629       rows[0] = nc * (slot);
1630       for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1631       PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1632     }
1633     /* do not copy values to GPU since they are all zero and not yet needed there */
1634     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1635     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1636     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1637     if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1638     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1639     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1640     PetscCall(PetscFree2(rows, cols));
1641   }
1642   PetscFunctionReturn(PETSC_SUCCESS);
1643 }
1644 
1645 /* ---------------------------------------------------------------------------------*/
1646 
1647 PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM da, Mat J)
1648 {
1649   PetscInt               xs, nx, i, i1, slot, gxs, gnx;
1650   PetscInt               m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l;
1651   PetscInt               istart, iend;
1652   DMBoundaryType         bx;
1653   ISLocalToGlobalMapping ltog, mltog;
1654 
1655   PetscFunctionBegin;
1656   /*
1657          nc - number of components per grid point
1658          col - number of colors needed in one direction for single component problem
1659   */
1660   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
1661   col = 2 * s + 1;
1662 
1663   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
1664   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
1665 
1666   PetscCall(MatSetBlockSize(J, nc));
1667   PetscCall(MatSeqAIJSetTotalPreallocation(J, nx * nc * col * nc));
1668 
1669   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1670   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1671   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1672 
1673   /*
1674     For each node in the grid: we get the neighbors in the local (on processor ordering
1675     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1676     PETSc ordering.
1677   */
1678   if (!da->prealloc_only) {
1679     PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols));
1680     for (i = xs; i < xs + nx; i++) {
1681       istart = PetscMax(-s, gxs - i);
1682       iend   = PetscMin(s, gxs + gnx - i - 1);
1683       slot   = i - gxs;
1684 
1685       cnt = 0;
1686       for (i1 = istart; i1 < iend + 1; i1++) {
1687         cols[cnt++] = nc * (slot + i1);
1688         for (l = 1; l < nc; l++) {
1689           cols[cnt] = 1 + cols[cnt - 1];
1690           cnt++;
1691         }
1692       }
1693       rows[0] = nc * (slot);
1694       for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1695       PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1696     }
1697     /* do not copy values to GPU since they are all zero and not yet needed there */
1698     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1699     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1700     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1701     if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1702     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1703     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1704     PetscCall(PetscFree2(rows, cols));
1705   }
1706   PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1707   PetscFunctionReturn(PETSC_SUCCESS);
1708 }
1709 
1710 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da, Mat J)
1711 {
1712   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1713   PetscInt               m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz;
1714   PetscInt               istart, iend, jstart, jend, ii, jj;
1715   MPI_Comm               comm;
1716   PetscScalar           *values;
1717   DMBoundaryType         bx, by;
1718   DMDAStencilType        st;
1719   ISLocalToGlobalMapping ltog;
1720 
1721   PetscFunctionBegin;
1722   /*
1723      nc - number of components per grid point
1724      col - number of colors needed in one direction for single component problem
1725   */
1726   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st));
1727   col = 2 * s + 1;
1728 
1729   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1730   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1731   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1732 
1733   PetscCall(PetscMalloc1(col * col * nc * nc, &cols));
1734 
1735   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1736 
1737   /* determine the matrix preallocation information */
1738   MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz);
1739   for (i = xs; i < xs + nx; i++) {
1740     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1741     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1742     for (j = ys; j < ys + ny; j++) {
1743       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1744       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1745       slot   = i - gxs + gnx * (j - gys);
1746 
1747       /* Find block columns in block row */
1748       cnt = 0;
1749       for (ii = istart; ii < iend + 1; ii++) {
1750         for (jj = jstart; jj < jend + 1; jj++) {
1751           if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1752             cols[cnt++] = slot + ii + gnx * jj;
1753           }
1754         }
1755       }
1756       PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz));
1757     }
1758   }
1759   PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz));
1760   PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
1761   MatPreallocateEnd(dnz, onz);
1762 
1763   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1764 
1765   /*
1766     For each node in the grid: we get the neighbors in the local (on processor ordering
1767     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1768     PETSc ordering.
1769   */
1770   if (!da->prealloc_only) {
1771     PetscCall(PetscCalloc1(col * col * nc * nc, &values));
1772     for (i = xs; i < xs + nx; i++) {
1773       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1774       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1775       for (j = ys; j < ys + ny; j++) {
1776         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1777         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1778         slot   = i - gxs + gnx * (j - gys);
1779         cnt    = 0;
1780         for (ii = istart; ii < iend + 1; ii++) {
1781           for (jj = jstart; jj < jend + 1; jj++) {
1782             if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1783               cols[cnt++] = slot + ii + gnx * jj;
1784             }
1785           }
1786         }
1787         PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
1788       }
1789     }
1790     PetscCall(PetscFree(values));
1791     /* do not copy values to GPU since they are all zero and not yet needed there */
1792     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1793     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1794     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1795     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1796     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1797   }
1798   PetscCall(PetscFree(cols));
1799   PetscFunctionReturn(PETSC_SUCCESS);
1800 }
1801 
1802 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da, Mat J)
1803 {
1804   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1805   PetscInt               m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz;
1806   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk;
1807   MPI_Comm               comm;
1808   PetscScalar           *values;
1809   DMBoundaryType         bx, by, bz;
1810   DMDAStencilType        st;
1811   ISLocalToGlobalMapping ltog;
1812 
1813   PetscFunctionBegin;
1814   /*
1815          nc - number of components per grid point
1816          col - number of colors needed in one direction for single component problem
1817   */
1818   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st));
1819   col = 2 * s + 1;
1820 
1821   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
1822   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
1823   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1824 
1825   PetscCall(PetscMalloc1(col * col * col, &cols));
1826 
1827   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1828 
1829   /* determine the matrix preallocation information */
1830   MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz);
1831   for (i = xs; i < xs + nx; i++) {
1832     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1833     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1834     for (j = ys; j < ys + ny; j++) {
1835       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1836       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1837       for (k = zs; k < zs + nz; k++) {
1838         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1839         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1840 
1841         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1842 
1843         /* Find block columns in block row */
1844         cnt = 0;
1845         for (ii = istart; ii < iend + 1; ii++) {
1846           for (jj = jstart; jj < jend + 1; jj++) {
1847             for (kk = kstart; kk < kend + 1; kk++) {
1848               if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1849                 cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
1850               }
1851             }
1852           }
1853         }
1854         PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz));
1855       }
1856     }
1857   }
1858   PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz));
1859   PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
1860   MatPreallocateEnd(dnz, onz);
1861 
1862   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1863 
1864   /*
1865     For each node in the grid: we get the neighbors in the local (on processor ordering
1866     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1867     PETSc ordering.
1868   */
1869   if (!da->prealloc_only) {
1870     PetscCall(PetscCalloc1(col * col * col * nc * nc, &values));
1871     for (i = xs; i < xs + nx; i++) {
1872       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1873       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1874       for (j = ys; j < ys + ny; j++) {
1875         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1876         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1877         for (k = zs; k < zs + nz; k++) {
1878           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1879           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1880 
1881           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1882 
1883           cnt = 0;
1884           for (ii = istart; ii < iend + 1; ii++) {
1885             for (jj = jstart; jj < jend + 1; jj++) {
1886               for (kk = kstart; kk < kend + 1; kk++) {
1887                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1888                   cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
1889                 }
1890               }
1891             }
1892           }
1893           PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
1894         }
1895       }
1896     }
1897     PetscCall(PetscFree(values));
1898     /* do not copy values to GPU since they are all zero and not yet needed there */
1899     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1900     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1901     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1902     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1903     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1904   }
1905   PetscCall(PetscFree(cols));
1906   PetscFunctionReturn(PETSC_SUCCESS);
1907 }
1908 
1909 /*
1910   This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to
1911   identify in the local ordering with periodic domain.
1912 */
1913 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog, PetscInt *row, PetscInt *cnt, PetscInt col[])
1914 {
1915   PetscInt i, n;
1916 
1917   PetscFunctionBegin;
1918   PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, 1, row, row));
1919   PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, *cnt, col, col));
1920   for (i = 0, n = 0; i < *cnt; i++) {
1921     if (col[i] >= *row) col[n++] = col[i];
1922   }
1923   *cnt = n;
1924   PetscFunctionReturn(PETSC_SUCCESS);
1925 }
1926 
1927 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da, Mat J)
1928 {
1929   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1930   PetscInt               m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz;
1931   PetscInt               istart, iend, jstart, jend, ii, jj;
1932   MPI_Comm               comm;
1933   PetscScalar           *values;
1934   DMBoundaryType         bx, by;
1935   DMDAStencilType        st;
1936   ISLocalToGlobalMapping ltog;
1937 
1938   PetscFunctionBegin;
1939   /*
1940      nc - number of components per grid point
1941      col - number of colors needed in one direction for single component problem
1942   */
1943   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st));
1944   col = 2 * s + 1;
1945 
1946   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1947   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1948   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1949 
1950   PetscCall(PetscMalloc1(col * col * nc * nc, &cols));
1951 
1952   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1953 
1954   /* determine the matrix preallocation information */
1955   MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz);
1956   for (i = xs; i < xs + nx; i++) {
1957     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1958     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1959     for (j = ys; j < ys + ny; j++) {
1960       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1961       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1962       slot   = i - gxs + gnx * (j - gys);
1963 
1964       /* Find block columns in block row */
1965       cnt = 0;
1966       for (ii = istart; ii < iend + 1; ii++) {
1967         for (jj = jstart; jj < jend + 1; jj++) {
1968           if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj;
1969         }
1970       }
1971       PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
1972       PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz));
1973     }
1974   }
1975   PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz));
1976   PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
1977   MatPreallocateEnd(dnz, onz);
1978 
1979   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1980 
1981   /*
1982     For each node in the grid: we get the neighbors in the local (on processor ordering
1983     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1984     PETSc ordering.
1985   */
1986   if (!da->prealloc_only) {
1987     PetscCall(PetscCalloc1(col * col * nc * nc, &values));
1988     for (i = xs; i < xs + nx; i++) {
1989       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1990       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1991       for (j = ys; j < ys + ny; j++) {
1992         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1993         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1994         slot   = i - gxs + gnx * (j - gys);
1995 
1996         /* Find block columns in block row */
1997         cnt = 0;
1998         for (ii = istart; ii < iend + 1; ii++) {
1999           for (jj = jstart; jj < jend + 1; jj++) {
2000             if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj;
2001           }
2002         }
2003         PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
2004         PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
2005       }
2006     }
2007     PetscCall(PetscFree(values));
2008     /* do not copy values to GPU since they are all zero and not yet needed there */
2009     PetscCall(MatBindToCPU(J, PETSC_TRUE));
2010     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
2011     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
2012     PetscCall(MatBindToCPU(J, PETSC_FALSE));
2013     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2014   }
2015   PetscCall(PetscFree(cols));
2016   PetscFunctionReturn(PETSC_SUCCESS);
2017 }
2018 
2019 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da, Mat J)
2020 {
2021   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
2022   PetscInt               m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz;
2023   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk;
2024   MPI_Comm               comm;
2025   PetscScalar           *values;
2026   DMBoundaryType         bx, by, bz;
2027   DMDAStencilType        st;
2028   ISLocalToGlobalMapping ltog;
2029 
2030   PetscFunctionBegin;
2031   /*
2032      nc - number of components per grid point
2033      col - number of colors needed in one direction for single component problem
2034   */
2035   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st));
2036   col = 2 * s + 1;
2037 
2038   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
2039   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
2040   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
2041 
2042   /* create the matrix */
2043   PetscCall(PetscMalloc1(col * col * col, &cols));
2044 
2045   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
2046 
2047   /* determine the matrix preallocation information */
2048   MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz);
2049   for (i = xs; i < xs + nx; i++) {
2050     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2051     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2052     for (j = ys; j < ys + ny; j++) {
2053       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2054       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2055       for (k = zs; k < zs + nz; k++) {
2056         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2057         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2058 
2059         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2060 
2061         /* Find block columns in block row */
2062         cnt = 0;
2063         for (ii = istart; ii < iend + 1; ii++) {
2064           for (jj = jstart; jj < jend + 1; jj++) {
2065             for (kk = kstart; kk < kend + 1; kk++) {
2066               if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
2067             }
2068           }
2069         }
2070         PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
2071         PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz));
2072       }
2073     }
2074   }
2075   PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz));
2076   PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
2077   MatPreallocateEnd(dnz, onz);
2078 
2079   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
2080 
2081   /*
2082     For each node in the grid: we get the neighbors in the local (on processor ordering
2083     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2084     PETSc ordering.
2085   */
2086   if (!da->prealloc_only) {
2087     PetscCall(PetscCalloc1(col * col * col * nc * nc, &values));
2088     for (i = xs; i < xs + nx; i++) {
2089       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2090       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2091       for (j = ys; j < ys + ny; j++) {
2092         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2093         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2094         for (k = zs; k < zs + nz; k++) {
2095           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2096           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2097 
2098           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2099 
2100           cnt = 0;
2101           for (ii = istart; ii < iend + 1; ii++) {
2102             for (jj = jstart; jj < jend + 1; jj++) {
2103               for (kk = kstart; kk < kend + 1; kk++) {
2104                 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
2105               }
2106             }
2107           }
2108           PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
2109           PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
2110         }
2111       }
2112     }
2113     PetscCall(PetscFree(values));
2114     /* do not copy values to GPU since they are all zero and not yet needed there */
2115     PetscCall(MatBindToCPU(J, PETSC_TRUE));
2116     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
2117     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
2118     PetscCall(MatBindToCPU(J, PETSC_FALSE));
2119     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2120   }
2121   PetscCall(PetscFree(cols));
2122   PetscFunctionReturn(PETSC_SUCCESS);
2123 }
2124 
2125 /* ---------------------------------------------------------------------------------*/
2126 
2127 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da, Mat J)
2128 {
2129   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
2130   PetscInt               m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, *dnz, *onz;
2131   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
2132   DM_DA                 *dd = (DM_DA *)da->data;
2133   PetscInt               ifill_col, *dfill = dd->dfill, *ofill = dd->ofill;
2134   MPI_Comm               comm;
2135   PetscScalar           *values;
2136   DMBoundaryType         bx, by, bz;
2137   ISLocalToGlobalMapping ltog;
2138   DMDAStencilType        st;
2139   PetscBool              removedups = PETSC_FALSE;
2140 
2141   PetscFunctionBegin;
2142   /*
2143          nc - number of components per grid point
2144          col - number of colors needed in one direction for single component problem
2145   */
2146   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
2147   col = 2 * s + 1;
2148   PetscCheck(bx != DM_BOUNDARY_PERIODIC || (m % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in X is divisible\n\
2149                  by 2*stencil_width + 1\n");
2150   PetscCheck(by != DM_BOUNDARY_PERIODIC || (n % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Y is divisible\n\
2151                  by 2*stencil_width + 1\n");
2152   PetscCheck(bz != DM_BOUNDARY_PERIODIC || (p % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Z is divisible\n\
2153                  by 2*stencil_width + 1\n");
2154 
2155   /*
2156        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
2157        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
2158   */
2159   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
2160   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
2161   if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE;
2162 
2163   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
2164   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
2165   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
2166 
2167   PetscCall(PetscMalloc1(col * col * col * nc, &cols));
2168   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
2169 
2170   /* determine the matrix preallocation information */
2171   MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
2172 
2173   PetscCall(MatSetBlockSize(J, nc));
2174   for (i = xs; i < xs + nx; i++) {
2175     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2176     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2177     for (j = ys; j < ys + ny; j++) {
2178       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2179       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2180       for (k = zs; k < zs + nz; k++) {
2181         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2182         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2183 
2184         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2185 
2186         for (l = 0; l < nc; l++) {
2187           cnt = 0;
2188           for (ii = istart; ii < iend + 1; ii++) {
2189             for (jj = jstart; jj < jend + 1; jj++) {
2190               for (kk = kstart; kk < kend + 1; kk++) {
2191                 if (ii || jj || kk) {
2192                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
2193                     for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2194                   }
2195                 } else {
2196                   if (dfill) {
2197                     for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2198                   } else {
2199                     for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2200                   }
2201                 }
2202               }
2203             }
2204           }
2205           row    = l + nc * (slot);
2206           maxcnt = PetscMax(maxcnt, cnt);
2207           if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
2208           else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
2209         }
2210       }
2211     }
2212   }
2213   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
2214   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
2215   MatPreallocateEnd(dnz, onz);
2216   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
2217 
2218   /*
2219     For each node in the grid: we get the neighbors in the local (on processor ordering
2220     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2221     PETSc ordering.
2222   */
2223   if (!da->prealloc_only) {
2224     PetscCall(PetscCalloc1(maxcnt, &values));
2225     for (i = xs; i < xs + nx; i++) {
2226       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2227       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2228       for (j = ys; j < ys + ny; j++) {
2229         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2230         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2231         for (k = zs; k < zs + nz; k++) {
2232           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2233           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2234 
2235           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2236 
2237           for (l = 0; l < nc; l++) {
2238             cnt = 0;
2239             for (ii = istart; ii < iend + 1; ii++) {
2240               for (jj = jstart; jj < jend + 1; jj++) {
2241                 for (kk = kstart; kk < kend + 1; kk++) {
2242                   if (ii || jj || kk) {
2243                     if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
2244                       for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2245                     }
2246                   } else {
2247                     if (dfill) {
2248                       for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2249                     } else {
2250                       for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2251                     }
2252                   }
2253                 }
2254               }
2255             }
2256             row = l + nc * (slot);
2257             PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, values, INSERT_VALUES));
2258           }
2259         }
2260       }
2261     }
2262     PetscCall(PetscFree(values));
2263     /* do not copy values to GPU since they are all zero and not yet needed there */
2264     PetscCall(MatBindToCPU(J, PETSC_TRUE));
2265     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
2266     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
2267     PetscCall(MatBindToCPU(J, PETSC_FALSE));
2268     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2269   }
2270   PetscCall(PetscFree(cols));
2271   PetscFunctionReturn(PETSC_SUCCESS);
2272 }
2273