xref: /petsc/src/dm/impls/da/fdda.c (revision 4d86920da9ee67c835173a5dfffa1b3a52fd24ca)
1 #include <petsc/private/dmdaimpl.h> /*I      "petscdmda.h"     I*/
2 #include <petscmat.h>
3 #include <petscbt.h>
4 
5 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM, ISColoringType, ISColoring *);
6 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM, ISColoringType, ISColoring *);
7 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM, ISColoringType, ISColoring *);
8 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM, ISColoringType, ISColoring *);
9 
10 /*
11    For ghost i that may be negative or greater than the upper bound this
12   maps it into the 0:m-1 range using periodicity
13 */
14 #define SetInRange(i, m) ((i < 0) ? m + i : ((i >= m) ? i - m : i))
15 
16 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill, PetscInt w, PetscInt **rfill)
17 {
18   PetscInt i, j, nz, *fill;
19 
20   PetscFunctionBegin;
21   if (!dfill) PetscFunctionReturn(PETSC_SUCCESS);
22 
23   /* count number nonzeros */
24   nz = 0;
25   for (i = 0; i < w; i++) {
26     for (j = 0; j < w; j++) {
27       if (dfill[w * i + j]) nz++;
28     }
29   }
30   PetscCall(PetscMalloc1(nz + w + 1, &fill));
31   /* construct modified CSR storage of nonzero structure */
32   /*  fill[0 -- w] marks starts of each row of column indices (and end of last row)
33    so fill[1] - fill[0] gives number of nonzeros in first row etc */
34   nz = w + 1;
35   for (i = 0; i < w; i++) {
36     fill[i] = nz;
37     for (j = 0; j < w; j++) {
38       if (dfill[w * i + j]) {
39         fill[nz] = j;
40         nz++;
41       }
42     }
43   }
44   fill[w] = nz;
45 
46   *rfill = fill;
47   PetscFunctionReturn(PETSC_SUCCESS);
48 }
49 
50 static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse, PetscInt w, PetscInt **rfill)
51 {
52   PetscInt nz;
53 
54   PetscFunctionBegin;
55   if (!dfillsparse) PetscFunctionReturn(PETSC_SUCCESS);
56 
57   /* Determine number of non-zeros */
58   nz = (dfillsparse[w] - w - 1);
59 
60   /* Allocate space for our copy of the given sparse matrix representation. */
61   PetscCall(PetscMalloc1(nz + w + 1, rfill));
62   PetscCall(PetscArraycpy(*rfill, dfillsparse, nz + w + 1));
63   PetscFunctionReturn(PETSC_SUCCESS);
64 }
65 
66 static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd)
67 {
68   PetscInt i, k, cnt = 1;
69 
70   PetscFunctionBegin;
71   /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of
72    columns to the left with any nonzeros in them plus 1 */
73   PetscCall(PetscCalloc1(dd->w, &dd->ofillcols));
74   for (i = 0; i < dd->w; i++) {
75     for (k = dd->ofill[i]; k < dd->ofill[i + 1]; k++) dd->ofillcols[dd->ofill[k]] = 1;
76   }
77   for (i = 0; i < dd->w; i++) {
78     if (dd->ofillcols[i]) dd->ofillcols[i] = cnt++;
79   }
80   PetscFunctionReturn(PETSC_SUCCESS);
81 }
82 
83 /*@
84   DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem
85   of the matrix returned by `DMCreateMatrix()`.
86 
87   Logically Collective
88 
89   Input Parameters:
90 + da    - the distributed array
91 . dfill - the fill pattern in the diagonal block (may be `NULL`, means use dense block)
92 - ofill - the fill pattern in the off-diagonal blocks
93 
94   Level: developer
95 
96   Notes:
97   This only makes sense when you are doing multicomponent problems but using the
98   `MATMPIAIJ` matrix format
99 
100   The format for `dfill` and `ofill` is a 2 dimensional dof by dof matrix with 1 entries
101   representing coupling and 0 entries for missing coupling. For example
102 .vb
103             dfill[9] = {1, 0, 0,
104                         1, 1, 0,
105                         0, 1, 1}
106 .ve
107   means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with
108   itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the
109   diagonal block).
110 
111   `DMDASetGetMatrix()` allows you to provide general code for those more complicated nonzero patterns then
112   can be represented in the `dfill`, `ofill` format
113 
114   Contributed by\:
115   Glenn Hammond
116 
117 .seealso: [](sec_struct), `DM`, `DMDA`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()`, `DMDASetBlockFillsSparse()`
118 @*/
119 PetscErrorCode DMDASetBlockFills(DM da, const PetscInt *dfill, const PetscInt *ofill)
120 {
121   DM_DA *dd = (DM_DA *)da->data;
122 
123   PetscFunctionBegin;
124   /* save the given dfill and ofill information */
125   PetscCall(DMDASetBlockFills_Private(dfill, dd->w, &dd->dfill));
126   PetscCall(DMDASetBlockFills_Private(ofill, dd->w, &dd->ofill));
127 
128   /* count nonzeros in ofill columns */
129   PetscCall(DMDASetBlockFills_Private2(dd));
130   PetscFunctionReturn(PETSC_SUCCESS);
131 }
132 
133 /*@
134   DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem
135   of the matrix returned by `DMCreateMatrix()`, using sparse representations
136   of fill patterns.
137 
138   Logically Collective
139 
140   Input Parameters:
141 + da          - the distributed array
142 . dfillsparse - the sparse fill pattern in the diagonal block (may be `NULL`, means use dense block)
143 - ofillsparse - the sparse fill pattern in the off-diagonal blocks
144 
145   Level: developer
146 
147   Notes:
148   This only makes sense when you are doing multicomponent problems but using the
149   `MATMPIAIJ` matrix format
150 
151   The format for `dfill` and `ofill` is a sparse representation of a
152   dof-by-dof matrix with 1 entries representing coupling and 0 entries
153   for missing coupling.  The sparse representation is a 1 dimensional
154   array of length nz + dof + 1, where nz is the number of non-zeros in
155   the matrix.  The first dof entries in the array give the
156   starting array indices of each row's items in the rest of the array,
157   the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array)
158   and the remaining nz items give the column indices of each of
159   the 1s within the logical 2D matrix.  Each row's items within
160   the array are the column indices of the 1s within that row
161   of the 2D matrix.  PETSc developers may recognize that this is the
162   same format as that computed by the `DMDASetBlockFills_Private()`
163   function from a dense 2D matrix representation.
164 
165   `DMDASetGetMatrix()` allows you to provide general code for those more complicated nonzero patterns then
166   can be represented in the `dfill`, `ofill` format
167 
168   Contributed by\:
169   Philip C. Roth
170 
171 .seealso: [](sec_struct), `DM`, `DMDA`, `DMDASetBlockFills()`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()`
172 @*/
173 PetscErrorCode DMDASetBlockFillsSparse(DM da, const PetscInt *dfillsparse, const PetscInt *ofillsparse)
174 {
175   DM_DA *dd = (DM_DA *)da->data;
176 
177   PetscFunctionBegin;
178   /* save the given dfill and ofill information */
179   PetscCall(DMDASetBlockFillsSparse_Private(dfillsparse, dd->w, &dd->dfill));
180   PetscCall(DMDASetBlockFillsSparse_Private(ofillsparse, dd->w, &dd->ofill));
181 
182   /* count nonzeros in ofill columns */
183   PetscCall(DMDASetBlockFills_Private2(dd));
184   PetscFunctionReturn(PETSC_SUCCESS);
185 }
186 
187 PetscErrorCode DMCreateColoring_DA(DM da, ISColoringType ctype, ISColoring *coloring)
188 {
189   PetscInt       dim, m, n, p, nc;
190   DMBoundaryType bx, by, bz;
191   MPI_Comm       comm;
192   PetscMPIInt    size;
193   PetscBool      isBAIJ;
194   DM_DA         *dd = (DM_DA *)da->data;
195 
196   PetscFunctionBegin;
197   /*
198                                   m
199           ------------------------------------------------------
200          |                                                     |
201          |                                                     |
202          |               ----------------------                |
203          |               |                    |                |
204       n  |           yn  |                    |                |
205          |               |                    |                |
206          |               .---------------------                |
207          |             (xs,ys)     xn                          |
208          |            .                                        |
209          |         (gxs,gys)                                   |
210          |                                                     |
211           -----------------------------------------------------
212   */
213 
214   /*
215          nc - number of components per grid point
216          col - number of colors needed in one direction for single component problem
217 
218   */
219   PetscCall(DMDAGetInfo(da, &dim, NULL, NULL, NULL, &m, &n, &p, &nc, NULL, &bx, &by, &bz, NULL));
220 
221   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
222   PetscCallMPI(MPI_Comm_size(comm, &size));
223   if (ctype == IS_COLORING_LOCAL) {
224     if (size == 1) {
225       ctype = IS_COLORING_GLOBAL;
226     } else {
227       PetscCheck((dim == 1) || !((m == 1 && bx == DM_BOUNDARY_PERIODIC) || (n == 1 && by == DM_BOUNDARY_PERIODIC) || (p == 1 && bz == DM_BOUNDARY_PERIODIC)), PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain on the same process");
228     }
229   }
230 
231   /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ
232      matrices is for the blocks, not the individual matrix elements  */
233   PetscCall(PetscStrbeginswith(da->mattype, MATBAIJ, &isBAIJ));
234   if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATMPIBAIJ, &isBAIJ));
235   if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATSEQBAIJ, &isBAIJ));
236   if (isBAIJ) {
237     dd->w  = 1;
238     dd->xs = dd->xs / nc;
239     dd->xe = dd->xe / nc;
240     dd->Xs = dd->Xs / nc;
241     dd->Xe = dd->Xe / nc;
242   }
243 
244   /*
245      We do not provide a getcoloring function in the DMDA operations because
246    the basic DMDA does not know about matrices. We think of DMDA as being
247    more low-level then matrices.
248   */
249   if (dim == 1) PetscCall(DMCreateColoring_DA_1d_MPIAIJ(da, ctype, coloring));
250   else if (dim == 2) PetscCall(DMCreateColoring_DA_2d_MPIAIJ(da, ctype, coloring));
251   else if (dim == 3) PetscCall(DMCreateColoring_DA_3d_MPIAIJ(da, ctype, coloring));
252   else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not done for %" PetscInt_FMT " dimension, send us mail petsc-maint@mcs.anl.gov for code", dim);
253   if (isBAIJ) {
254     dd->w  = nc;
255     dd->xs = dd->xs * nc;
256     dd->xe = dd->xe * nc;
257     dd->Xs = dd->Xs * nc;
258     dd->Xe = dd->Xe * nc;
259   }
260   PetscFunctionReturn(PETSC_SUCCESS);
261 }
262 
263 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
264 {
265   PetscInt         xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, M, N, dim, s, k, nc, col;
266   PetscInt         ncolors = 0;
267   MPI_Comm         comm;
268   DMBoundaryType   bx, by;
269   DMDAStencilType  st;
270   ISColoringValue *colors;
271   DM_DA           *dd = (DM_DA *)da->data;
272 
273   PetscFunctionBegin;
274   /*
275          nc - number of components per grid point
276          col - number of colors needed in one direction for single component problem
277 
278   */
279   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st));
280   col = 2 * s + 1;
281   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
282   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
283   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
284 
285   /* special case as taught to us by Paul Hovland */
286   if (st == DMDA_STENCIL_STAR && s == 1) {
287     PetscCall(DMCreateColoring_DA_2d_5pt_MPIAIJ(da, ctype, coloring));
288   } else {
289     if (ctype == IS_COLORING_GLOBAL) {
290       if (!dd->localcoloring) {
291         PetscCall(PetscMalloc1(nc * nx * ny, &colors));
292         ii = 0;
293         for (j = ys; j < ys + ny; j++) {
294           for (i = xs; i < xs + nx; i++) {
295             for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((i % col) + col * (j % col));
296           }
297         }
298         ncolors = nc + nc * (col - 1 + col * (col - 1));
299         PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring));
300       }
301       *coloring = dd->localcoloring;
302     } else if (ctype == IS_COLORING_LOCAL) {
303       if (!dd->ghostedcoloring) {
304         PetscCall(PetscMalloc1(nc * gnx * gny, &colors));
305         ii = 0;
306         for (j = gys; j < gys + gny; j++) {
307           for (i = gxs; i < gxs + gnx; i++) {
308             for (k = 0; k < nc; k++) {
309               /* the complicated stuff is to handle periodic boundaries */
310               colors[ii++] = k + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col));
311             }
312           }
313         }
314         ncolors = nc + nc * (col - 1 + col * (col - 1));
315         PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
316         /* PetscIntView(ncolors,(PetscInt*)colors,0); */
317 
318         PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
319       }
320       *coloring = dd->ghostedcoloring;
321     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
322   }
323   PetscCall(ISColoringReference(*coloring));
324   PetscFunctionReturn(PETSC_SUCCESS);
325 }
326 
327 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
328 {
329   PetscInt         xs, ys, nx, ny, i, j, gxs, gys, gnx, gny, m, n, p, dim, s, k, nc, col, zs, gzs, ii, l, nz, gnz, M, N, P;
330   PetscInt         ncolors;
331   MPI_Comm         comm;
332   DMBoundaryType   bx, by, bz;
333   DMDAStencilType  st;
334   ISColoringValue *colors;
335   DM_DA           *dd = (DM_DA *)da->data;
336 
337   PetscFunctionBegin;
338   /*
339          nc - number of components per grid point
340          col - number of colors needed in one direction for single component problem
341   */
342   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
343   col = 2 * s + 1;
344   PetscCheck(bx != DM_BOUNDARY_PERIODIC || (m % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in X is divisible\n\
345                  by 2*stencil_width + 1\n");
346   PetscCheck(by != DM_BOUNDARY_PERIODIC || (n % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Y is divisible\n\
347                  by 2*stencil_width + 1\n");
348   PetscCheck(bz != DM_BOUNDARY_PERIODIC || (p % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Z is divisible\n\
349                  by 2*stencil_width + 1\n");
350 
351   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
352   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
353   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
354 
355   /* create the coloring */
356   if (ctype == IS_COLORING_GLOBAL) {
357     if (!dd->localcoloring) {
358       PetscCall(PetscMalloc1(nc * nx * ny * nz, &colors));
359       ii = 0;
360       for (k = zs; k < zs + nz; k++) {
361         for (j = ys; j < ys + ny; j++) {
362           for (i = xs; i < xs + nx; i++) {
363             for (l = 0; l < nc; l++) colors[ii++] = l + nc * ((i % col) + col * (j % col) + col * col * (k % col));
364           }
365         }
366       }
367       ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1));
368       PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny * nz, colors, PETSC_OWN_POINTER, &dd->localcoloring));
369     }
370     *coloring = dd->localcoloring;
371   } else if (ctype == IS_COLORING_LOCAL) {
372     if (!dd->ghostedcoloring) {
373       PetscCall(PetscMalloc1(nc * gnx * gny * gnz, &colors));
374       ii = 0;
375       for (k = gzs; k < gzs + gnz; k++) {
376         for (j = gys; j < gys + gny; j++) {
377           for (i = gxs; i < gxs + gnx; i++) {
378             for (l = 0; l < nc; l++) {
379               /* the complicated stuff is to handle periodic boundaries */
380               colors[ii++] = l + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col) + col * col * (SetInRange(k, p) % col));
381             }
382           }
383         }
384       }
385       ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1));
386       PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny * gnz, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
387       PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
388     }
389     *coloring = dd->ghostedcoloring;
390   } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
391   PetscCall(ISColoringReference(*coloring));
392   PetscFunctionReturn(PETSC_SUCCESS);
393 }
394 
395 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
396 {
397   PetscInt         xs, nx, i, i1, gxs, gnx, l, m, M, dim, s, nc, col;
398   PetscInt         ncolors;
399   MPI_Comm         comm;
400   DMBoundaryType   bx;
401   ISColoringValue *colors;
402   DM_DA           *dd = (DM_DA *)da->data;
403 
404   PetscFunctionBegin;
405   /*
406          nc - number of components per grid point
407          col - number of colors needed in one direction for single component problem
408   */
409   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, &M, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
410   col = 2 * s + 1;
411   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
412   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
413   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
414 
415   /* create the coloring */
416   if (ctype == IS_COLORING_GLOBAL) {
417     if (!dd->localcoloring) {
418       PetscCall(PetscMalloc1(nc * nx, &colors));
419       if (dd->ofillcols) {
420         PetscInt tc = 0;
421         for (i = 0; i < nc; i++) tc += (PetscInt)(dd->ofillcols[i] > 0);
422         i1 = 0;
423         for (i = xs; i < xs + nx; i++) {
424           for (l = 0; l < nc; l++) {
425             if (dd->ofillcols[l] && (i % col)) {
426               colors[i1++] = nc - 1 + tc * ((i % col) - 1) + dd->ofillcols[l];
427             } else {
428               colors[i1++] = l;
429             }
430           }
431         }
432         ncolors = nc + 2 * s * tc;
433       } else {
434         i1 = 0;
435         for (i = xs; i < xs + nx; i++) {
436           for (l = 0; l < nc; l++) colors[i1++] = l + nc * (i % col);
437         }
438         ncolors = nc + nc * (col - 1);
439       }
440       PetscCall(ISColoringCreate(comm, ncolors, nc * nx, colors, PETSC_OWN_POINTER, &dd->localcoloring));
441     }
442     *coloring = dd->localcoloring;
443   } else if (ctype == IS_COLORING_LOCAL) {
444     if (!dd->ghostedcoloring) {
445       PetscCall(PetscMalloc1(nc * gnx, &colors));
446       i1 = 0;
447       for (i = gxs; i < gxs + gnx; i++) {
448         for (l = 0; l < nc; l++) {
449           /* the complicated stuff is to handle periodic boundaries */
450           colors[i1++] = l + nc * (SetInRange(i, m) % col);
451         }
452       }
453       ncolors = nc + nc * (col - 1);
454       PetscCall(ISColoringCreate(comm, ncolors, nc * gnx, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
455       PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
456     }
457     *coloring = dd->ghostedcoloring;
458   } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
459   PetscCall(ISColoringReference(*coloring));
460   PetscFunctionReturn(PETSC_SUCCESS);
461 }
462 
463 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
464 {
465   PetscInt         xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, dim, s, k, nc;
466   PetscInt         ncolors;
467   MPI_Comm         comm;
468   DMBoundaryType   bx, by;
469   ISColoringValue *colors;
470   DM_DA           *dd = (DM_DA *)da->data;
471 
472   PetscFunctionBegin;
473   /*
474          nc - number of components per grid point
475          col - number of colors needed in one direction for single component problem
476   */
477   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, NULL));
478   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
479   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
480   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
481   /* create the coloring */
482   if (ctype == IS_COLORING_GLOBAL) {
483     if (!dd->localcoloring) {
484       PetscCall(PetscMalloc1(nc * nx * ny, &colors));
485       ii = 0;
486       for (j = ys; j < ys + ny; j++) {
487         for (i = xs; i < xs + nx; i++) {
488           for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((3 * j + i) % 5);
489         }
490       }
491       ncolors = 5 * nc;
492       PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring));
493     }
494     *coloring = dd->localcoloring;
495   } else if (ctype == IS_COLORING_LOCAL) {
496     if (!dd->ghostedcoloring) {
497       PetscCall(PetscMalloc1(nc * gnx * gny, &colors));
498       ii = 0;
499       for (j = gys; j < gys + gny; j++) {
500         for (i = gxs; i < gxs + gnx; i++) {
501           for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((3 * SetInRange(j, n) + SetInRange(i, m)) % 5);
502         }
503       }
504       ncolors = 5 * nc;
505       PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
506       PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
507     }
508     *coloring = dd->ghostedcoloring;
509   } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
510   PetscFunctionReturn(PETSC_SUCCESS);
511 }
512 
513 /* =========================================================================== */
514 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM, Mat);
515 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM, Mat);
516 extern PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM, Mat);
517 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM, Mat);
518 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM, Mat);
519 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM, Mat);
520 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM, Mat);
521 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM, Mat);
522 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM, Mat);
523 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM, Mat);
524 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM, Mat);
525 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM, Mat);
526 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM, Mat);
527 extern PetscErrorCode DMCreateMatrix_DA_IS(DM, Mat);
528 
529 static PetscErrorCode MatView_MPI_DA(Mat A, PetscViewer viewer)
530 {
531   DM                da;
532   const char       *prefix;
533   Mat               AA, Anatural;
534   AO                ao;
535   PetscInt          rstart, rend, *petsc, i;
536   IS                is;
537   MPI_Comm          comm;
538   PetscViewerFormat format;
539   PetscBool         flag;
540 
541   PetscFunctionBegin;
542   /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */
543   PetscCall(PetscViewerGetFormat(viewer, &format));
544   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(PETSC_SUCCESS);
545 
546   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
547   PetscCall(MatGetDM(A, &da));
548   PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA");
549 
550   PetscCall(DMDAGetAO(da, &ao));
551   PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
552   PetscCall(PetscMalloc1(rend - rstart, &petsc));
553   for (i = rstart; i < rend; i++) petsc[i - rstart] = i;
554   PetscCall(AOApplicationToPetsc(ao, rend - rstart, petsc));
555   PetscCall(ISCreateGeneral(comm, rend - rstart, petsc, PETSC_OWN_POINTER, &is));
556 
557   /* call viewer on natural ordering */
558   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPISELL, &flag));
559   if (flag) {
560     PetscCall(MatConvert(A, MATAIJ, MAT_INITIAL_MATRIX, &AA));
561     A = AA;
562   }
563   PetscCall(MatCreateSubMatrix(A, is, is, MAT_INITIAL_MATRIX, &Anatural));
564   PetscCall(ISDestroy(&is));
565   PetscCall(PetscObjectGetOptionsPrefix((PetscObject)A, &prefix));
566   PetscCall(PetscObjectSetOptionsPrefix((PetscObject)Anatural, prefix));
567   PetscCall(PetscObjectSetName((PetscObject)Anatural, ((PetscObject)A)->name));
568   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE;
569   PetscCall(MatView(Anatural, viewer));
570   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE;
571   PetscCall(MatDestroy(&Anatural));
572   if (flag) PetscCall(MatDestroy(&AA));
573   PetscFunctionReturn(PETSC_SUCCESS);
574 }
575 
576 static PetscErrorCode MatLoad_MPI_DA(Mat A, PetscViewer viewer)
577 {
578   DM       da;
579   Mat      Anatural, Aapp;
580   AO       ao;
581   PetscInt rstart, rend, *app, i, m, n, M, N;
582   IS       is;
583   MPI_Comm comm;
584 
585   PetscFunctionBegin;
586   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
587   PetscCall(MatGetDM(A, &da));
588   PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA");
589 
590   /* Load the matrix in natural ordering */
591   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Anatural));
592   PetscCall(MatSetType(Anatural, ((PetscObject)A)->type_name));
593   PetscCall(MatGetSize(A, &M, &N));
594   PetscCall(MatGetLocalSize(A, &m, &n));
595   PetscCall(MatSetSizes(Anatural, m, n, M, N));
596   PetscCall(MatLoad(Anatural, viewer));
597 
598   /* Map natural ordering to application ordering and create IS */
599   PetscCall(DMDAGetAO(da, &ao));
600   PetscCall(MatGetOwnershipRange(Anatural, &rstart, &rend));
601   PetscCall(PetscMalloc1(rend - rstart, &app));
602   for (i = rstart; i < rend; i++) app[i - rstart] = i;
603   PetscCall(AOPetscToApplication(ao, rend - rstart, app));
604   PetscCall(ISCreateGeneral(comm, rend - rstart, app, PETSC_OWN_POINTER, &is));
605 
606   /* Do permutation and replace header */
607   PetscCall(MatCreateSubMatrix(Anatural, is, is, MAT_INITIAL_MATRIX, &Aapp));
608   PetscCall(MatHeaderReplace(A, &Aapp));
609   PetscCall(ISDestroy(&is));
610   PetscCall(MatDestroy(&Anatural));
611   PetscFunctionReturn(PETSC_SUCCESS);
612 }
613 
614 PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J)
615 {
616   PetscInt    dim, dof, nx, ny, nz, dims[3], starts[3], M, N, P;
617   Mat         A;
618   MPI_Comm    comm;
619   MatType     Atype;
620   MatType     mtype;
621   PetscMPIInt size;
622   DM_DA      *dd    = (DM_DA *)da->data;
623   void (*aij)(void) = NULL, (*baij)(void) = NULL, (*sbaij)(void) = NULL, (*sell)(void) = NULL, (*is)(void) = NULL;
624 
625   PetscFunctionBegin;
626   PetscCall(MatInitializePackage());
627   mtype = da->mattype;
628 
629   /*
630                                   m
631           ------------------------------------------------------
632          |                                                     |
633          |                                                     |
634          |               ----------------------                |
635          |               |                    |                |
636       n  |           ny  |                    |                |
637          |               |                    |                |
638          |               .---------------------                |
639          |             (xs,ys)     nx                          |
640          |            .                                        |
641          |         (gxs,gys)                                   |
642          |                                                     |
643           -----------------------------------------------------
644   */
645 
646   /*
647          nc - number of components per grid point
648          col - number of colors needed in one direction for single component problem
649   */
650   M   = dd->M;
651   N   = dd->N;
652   P   = dd->P;
653   dim = da->dim;
654   dof = dd->w;
655   /* PetscCall(DMDAGetInfo(da,&dim,&M,&N,&P,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL)); */
656   PetscCall(DMDAGetCorners(da, NULL, NULL, NULL, &nx, &ny, &nz));
657   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
658   PetscCall(MatCreate(comm, &A));
659   PetscCall(MatSetSizes(A, dof * nx * ny * nz, dof * nx * ny * nz, dof * M * N * P, dof * M * N * P));
660   PetscCall(MatSetType(A, mtype));
661   PetscCall(MatSetFromOptions(A));
662   if (dof * nx * ny * nz < da->bind_below) {
663     PetscCall(MatSetBindingPropagates(A, PETSC_TRUE));
664     PetscCall(MatBindToCPU(A, PETSC_TRUE));
665   }
666   PetscCall(MatSetDM(A, da));
667   if (da->structure_only) PetscCall(MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE));
668   PetscCall(MatGetType(A, &Atype));
669   /*
670      We do not provide a getmatrix function in the DMDA operations because
671    the basic DMDA does not know about matrices. We think of DMDA as being more
672    more low-level than matrices. This is kind of cheating but, cause sometimes
673    we think of DMDA has higher level than matrices.
674 
675      We could switch based on Atype (or mtype), but we do not since the
676    specialized setting routines depend only on the particular preallocation
677    details of the matrix, not the type itself.
678   */
679   if (!da->prealloc_skip) { // Flag is likely set when user intends to use MatSetPreallocationCOO()
680     PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJSetPreallocation_C", &aij));
681     if (!aij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJSetPreallocation_C", &aij));
682     if (!aij) {
683       PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIBAIJSetPreallocation_C", &baij));
684       if (!baij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", &baij));
685       if (!baij) {
686         PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPISBAIJSetPreallocation_C", &sbaij));
687         if (!sbaij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqSBAIJSetPreallocation_C", &sbaij));
688         if (!sbaij) {
689           PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPISELLSetPreallocation_C", &sell));
690           if (!sell) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqSELLSetPreallocation_C", &sell));
691         }
692         if (!sell) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatISSetPreallocation_C", &is));
693       }
694     }
695   }
696   if (aij) {
697     if (dim == 1) {
698       if (dd->ofill) {
699         PetscCall(DMCreateMatrix_DA_1d_MPIAIJ_Fill(da, A));
700       } else {
701         DMBoundaryType bx;
702         PetscMPIInt    size;
703         PetscCall(DMDAGetInfo(da, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &bx, NULL, NULL, NULL));
704         PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size));
705         if (size == 1 && bx == DM_BOUNDARY_NONE) {
706           PetscCall(DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(da, A));
707         } else {
708           PetscCall(DMCreateMatrix_DA_1d_MPIAIJ(da, A));
709         }
710       }
711     } else if (dim == 2) {
712       if (dd->ofill) {
713         PetscCall(DMCreateMatrix_DA_2d_MPIAIJ_Fill(da, A));
714       } else {
715         PetscCall(DMCreateMatrix_DA_2d_MPIAIJ(da, A));
716       }
717     } else if (dim == 3) {
718       if (dd->ofill) {
719         PetscCall(DMCreateMatrix_DA_3d_MPIAIJ_Fill(da, A));
720       } else {
721         PetscCall(DMCreateMatrix_DA_3d_MPIAIJ(da, A));
722       }
723     }
724   } else if (baij) {
725     if (dim == 2) {
726       PetscCall(DMCreateMatrix_DA_2d_MPIBAIJ(da, A));
727     } else if (dim == 3) {
728       PetscCall(DMCreateMatrix_DA_3d_MPIBAIJ(da, A));
729     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
730   } else if (sbaij) {
731     if (dim == 2) {
732       PetscCall(DMCreateMatrix_DA_2d_MPISBAIJ(da, A));
733     } else if (dim == 3) {
734       PetscCall(DMCreateMatrix_DA_3d_MPISBAIJ(da, A));
735     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
736   } else if (sell) {
737     if (dim == 2) {
738       PetscCall(DMCreateMatrix_DA_2d_MPISELL(da, A));
739     } else if (dim == 3) {
740       PetscCall(DMCreateMatrix_DA_3d_MPISELL(da, A));
741     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
742   } else if (is) {
743     PetscCall(DMCreateMatrix_DA_IS(da, A));
744   } else { // unknown type or da->prealloc_skip so structural information may be needed, but no prealloc
745     ISLocalToGlobalMapping ltog;
746 
747     PetscCall(MatSetBlockSize(A, dof));
748     PetscCall(MatSetUp(A));
749     PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
750     PetscCall(MatSetLocalToGlobalMapping(A, ltog, ltog));
751   }
752   PetscCall(DMDAGetGhostCorners(da, &starts[0], &starts[1], &starts[2], &dims[0], &dims[1], &dims[2]));
753   PetscCall(MatSetStencil(A, dim, dims, starts, dof));
754   PetscCall(MatSetDM(A, da));
755   PetscCallMPI(MPI_Comm_size(comm, &size));
756   if (size > 1) {
757     /* change viewer to display matrix in natural ordering */
758     PetscCall(MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA));
759     PetscCall(MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA));
760   }
761   *J = A;
762   PetscFunctionReturn(PETSC_SUCCESS);
763 }
764 
765 PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]);
766 
767 PetscErrorCode DMCreateMatrix_DA_IS(DM dm, Mat J)
768 {
769   DM_DA                 *da = (DM_DA *)dm->data;
770   Mat                    lJ, P;
771   ISLocalToGlobalMapping ltog;
772   IS                     is;
773   PetscBT                bt;
774   const PetscInt        *e_loc, *idx;
775   PetscInt               i, nel, nen, nv, dof, *gidx, n, N;
776 
777   /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted)
778      We need to filter out the local indices that are not represented through the DMDAGetElements decomposition */
779   PetscFunctionBegin;
780   dof = da->w;
781   PetscCall(MatSetBlockSize(J, dof));
782   PetscCall(DMGetLocalToGlobalMapping(dm, &ltog));
783 
784   /* flag local elements indices in local DMDA numbering */
785   PetscCall(ISLocalToGlobalMappingGetSize(ltog, &nv));
786   PetscCall(PetscBTCreate(nv / dof, &bt));
787   PetscCall(DMDAGetElements(dm, &nel, &nen, &e_loc)); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */
788   for (i = 0; i < nel * nen; i++) PetscCall(PetscBTSet(bt, e_loc[i]));
789 
790   /* filter out (set to -1) the global indices not used by the local elements */
791   PetscCall(PetscMalloc1(nv / dof, &gidx));
792   PetscCall(ISLocalToGlobalMappingGetBlockIndices(ltog, &idx));
793   PetscCall(PetscArraycpy(gidx, idx, nv / dof));
794   PetscCall(ISLocalToGlobalMappingRestoreBlockIndices(ltog, &idx));
795   for (i = 0; i < nv / dof; i++)
796     if (!PetscBTLookup(bt, i)) gidx[i] = -1;
797   PetscCall(PetscBTDestroy(&bt));
798   PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)dm), dof, nv / dof, gidx, PETSC_OWN_POINTER, &is));
799   PetscCall(ISLocalToGlobalMappingCreateIS(is, &ltog));
800   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
801   PetscCall(ISLocalToGlobalMappingDestroy(&ltog));
802   PetscCall(ISDestroy(&is));
803 
804   /* Preallocation */
805   if (dm->prealloc_skip) {
806     PetscCall(MatSetUp(J));
807   } else {
808     PetscCall(MatISGetLocalMat(J, &lJ));
809     PetscCall(MatGetLocalToGlobalMapping(lJ, &ltog, NULL));
810     PetscCall(MatCreate(PetscObjectComm((PetscObject)lJ), &P));
811     PetscCall(MatSetType(P, MATPREALLOCATOR));
812     PetscCall(MatSetLocalToGlobalMapping(P, ltog, ltog));
813     PetscCall(MatGetSize(lJ, &N, NULL));
814     PetscCall(MatGetLocalSize(lJ, &n, NULL));
815     PetscCall(MatSetSizes(P, n, n, N, N));
816     PetscCall(MatSetBlockSize(P, dof));
817     PetscCall(MatSetUp(P));
818     for (i = 0; i < nel; i++) PetscCall(MatSetValuesBlockedLocal(P, nen, e_loc + i * nen, nen, e_loc + i * nen, NULL, INSERT_VALUES));
819     PetscCall(MatPreallocatorPreallocate(P, (PetscBool)!da->prealloc_only, lJ));
820     PetscCall(MatISRestoreLocalMat(J, &lJ));
821     PetscCall(DMDARestoreElements(dm, &nel, &nen, &e_loc));
822     PetscCall(MatDestroy(&P));
823 
824     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
825     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
826   }
827   PetscFunctionReturn(PETSC_SUCCESS);
828 }
829 
830 PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da, Mat J)
831 {
832   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p;
833   PetscInt               lstart, lend, pstart, pend, *dnz, *onz;
834   MPI_Comm               comm;
835   PetscScalar           *values;
836   DMBoundaryType         bx, by;
837   ISLocalToGlobalMapping ltog;
838   DMDAStencilType        st;
839 
840   PetscFunctionBegin;
841   /*
842          nc - number of components per grid point
843          col - number of colors needed in one direction for single component problem
844   */
845   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st));
846   col = 2 * s + 1;
847   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
848   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
849   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
850 
851   PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols));
852   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
853 
854   PetscCall(MatSetBlockSize(J, nc));
855   /* determine the matrix preallocation information */
856   MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
857   for (i = xs; i < xs + nx; i++) {
858     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
859     pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
860 
861     for (j = ys; j < ys + ny; j++) {
862       slot = i - gxs + gnx * (j - gys);
863 
864       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
865       lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
866 
867       cnt = 0;
868       for (k = 0; k < nc; k++) {
869         for (l = lstart; l < lend + 1; l++) {
870           for (p = pstart; p < pend + 1; p++) {
871             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
872               cols[cnt++] = k + nc * (slot + gnx * l + p);
873             }
874           }
875         }
876         rows[k] = k + nc * (slot);
877       }
878       PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
879     }
880   }
881   PetscCall(MatSetBlockSize(J, nc));
882   PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz));
883   PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz));
884   MatPreallocateEnd(dnz, onz);
885 
886   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
887 
888   /*
889     For each node in the grid: we get the neighbors in the local (on processor ordering
890     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
891     PETSc ordering.
892   */
893   if (!da->prealloc_only) {
894     PetscCall(PetscCalloc1(col * col * nc * nc, &values));
895     for (i = xs; i < xs + nx; i++) {
896       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
897       pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
898 
899       for (j = ys; j < ys + ny; j++) {
900         slot = i - gxs + gnx * (j - gys);
901 
902         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
903         lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
904 
905         cnt = 0;
906         for (k = 0; k < nc; k++) {
907           for (l = lstart; l < lend + 1; l++) {
908             for (p = pstart; p < pend + 1; p++) {
909               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
910                 cols[cnt++] = k + nc * (slot + gnx * l + p);
911               }
912             }
913           }
914           rows[k] = k + nc * (slot);
915         }
916         PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES));
917       }
918     }
919     PetscCall(PetscFree(values));
920     /* do not copy values to GPU since they are all zero and not yet needed there */
921     PetscCall(MatBindToCPU(J, PETSC_TRUE));
922     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
923     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
924     PetscCall(MatBindToCPU(J, PETSC_FALSE));
925     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
926   }
927   PetscCall(PetscFree2(rows, cols));
928   PetscFunctionReturn(PETSC_SUCCESS);
929 }
930 
931 PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da, Mat J)
932 {
933   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
934   PetscInt               m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL;
935   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
936   MPI_Comm               comm;
937   PetscScalar           *values;
938   DMBoundaryType         bx, by, bz;
939   ISLocalToGlobalMapping ltog;
940   DMDAStencilType        st;
941 
942   PetscFunctionBegin;
943   /*
944          nc - number of components per grid point
945          col - number of colors needed in one direction for single component problem
946   */
947   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
948   col = 2 * s + 1;
949   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
950   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
951   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
952 
953   PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols));
954   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
955 
956   PetscCall(MatSetBlockSize(J, nc));
957   /* determine the matrix preallocation information */
958   MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
959   for (i = xs; i < xs + nx; i++) {
960     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
961     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
962     for (j = ys; j < ys + ny; j++) {
963       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
964       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
965       for (k = zs; k < zs + nz; k++) {
966         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
967         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
968 
969         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
970 
971         cnt = 0;
972         for (l = 0; l < nc; l++) {
973           for (ii = istart; ii < iend + 1; ii++) {
974             for (jj = jstart; jj < jend + 1; jj++) {
975               for (kk = kstart; kk < kend + 1; kk++) {
976                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
977                   cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
978                 }
979               }
980             }
981           }
982           rows[l] = l + nc * (slot);
983         }
984         PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
985       }
986     }
987   }
988   PetscCall(MatSetBlockSize(J, nc));
989   PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz));
990   PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz));
991   MatPreallocateEnd(dnz, onz);
992   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
993 
994   /*
995     For each node in the grid: we get the neighbors in the local (on processor ordering
996     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
997     PETSc ordering.
998   */
999   if (!da->prealloc_only) {
1000     PetscCall(PetscCalloc1(col * col * col * nc * nc * nc, &values));
1001     for (i = xs; i < xs + nx; i++) {
1002       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1003       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1004       for (j = ys; j < ys + ny; j++) {
1005         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1006         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1007         for (k = zs; k < zs + nz; k++) {
1008           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1009           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1010 
1011           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1012 
1013           cnt = 0;
1014           for (l = 0; l < nc; l++) {
1015             for (ii = istart; ii < iend + 1; ii++) {
1016               for (jj = jstart; jj < jend + 1; jj++) {
1017                 for (kk = kstart; kk < kend + 1; kk++) {
1018                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1019                     cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
1020                   }
1021                 }
1022               }
1023             }
1024             rows[l] = l + nc * (slot);
1025           }
1026           PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES));
1027         }
1028       }
1029     }
1030     PetscCall(PetscFree(values));
1031     /* do not copy values to GPU since they are all zero and not yet needed there */
1032     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1033     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1034     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1035     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1036     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1037   }
1038   PetscCall(PetscFree2(rows, cols));
1039   PetscFunctionReturn(PETSC_SUCCESS);
1040 }
1041 
1042 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da, Mat J)
1043 {
1044   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, M, N;
1045   PetscInt               lstart, lend, pstart, pend, *dnz, *onz;
1046   MPI_Comm               comm;
1047   DMBoundaryType         bx, by;
1048   ISLocalToGlobalMapping ltog, mltog;
1049   DMDAStencilType        st;
1050   PetscBool              removedups = PETSC_FALSE, alreadyboundtocpu = PETSC_TRUE;
1051 
1052   PetscFunctionBegin;
1053   /*
1054          nc - number of components per grid point
1055          col - number of colors needed in one direction for single component problem
1056   */
1057   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st));
1058   if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE));
1059   col = 2 * s + 1;
1060   /*
1061        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1062        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1063   */
1064   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1065   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1066   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1067   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1068   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1069 
1070   PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols));
1071   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1072 
1073   PetscCall(MatSetBlockSize(J, nc));
1074   /* determine the matrix preallocation information */
1075   MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
1076   for (i = xs; i < xs + nx; i++) {
1077     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1078     pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1079 
1080     for (j = ys; j < ys + ny; j++) {
1081       slot = i - gxs + gnx * (j - gys);
1082 
1083       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1084       lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1085 
1086       cnt = 0;
1087       for (k = 0; k < nc; k++) {
1088         for (l = lstart; l < lend + 1; l++) {
1089           for (p = pstart; p < pend + 1; p++) {
1090             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
1091               cols[cnt++] = k + nc * (slot + gnx * l + p);
1092             }
1093           }
1094         }
1095         rows[k] = k + nc * (slot);
1096       }
1097       if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1098       else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1099     }
1100   }
1101   PetscCall(MatSetBlockSize(J, nc));
1102   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
1103   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
1104   MatPreallocateEnd(dnz, onz);
1105   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1106   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1107 
1108   /*
1109     For each node in the grid: we get the neighbors in the local (on processor ordering
1110     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1111     PETSc ordering.
1112   */
1113   if (!da->prealloc_only) {
1114     for (i = xs; i < xs + nx; i++) {
1115       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1116       pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1117 
1118       for (j = ys; j < ys + ny; j++) {
1119         slot = i - gxs + gnx * (j - gys);
1120 
1121         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1122         lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1123 
1124         cnt = 0;
1125         for (l = lstart; l < lend + 1; l++) {
1126           for (p = pstart; p < pend + 1; p++) {
1127             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
1128               cols[cnt++] = nc * (slot + gnx * l + p);
1129               for (k = 1; k < nc; k++) {
1130                 cols[cnt] = 1 + cols[cnt - 1];
1131                 cnt++;
1132               }
1133             }
1134           }
1135         }
1136         for (k = 0; k < nc; k++) rows[k] = k + nc * (slot);
1137         PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1138       }
1139     }
1140     /* do not copy values to GPU since they are all zero and not yet needed there */
1141     PetscCall(MatBoundToCPU(J, &alreadyboundtocpu));
1142     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1143     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1144     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1145     if (!alreadyboundtocpu) PetscCall(MatBindToCPU(J, PETSC_FALSE));
1146     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1147     if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1148   }
1149   PetscCall(PetscFree2(rows, cols));
1150   PetscFunctionReturn(PETSC_SUCCESS);
1151 }
1152 
1153 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da, Mat J)
1154 {
1155   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1156   PetscInt               m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, M, N;
1157   PetscInt               lstart, lend, pstart, pend, *dnz, *onz;
1158   DM_DA                 *dd = (DM_DA *)da->data;
1159   PetscInt               ifill_col, *ofill = dd->ofill, *dfill = dd->dfill;
1160   MPI_Comm               comm;
1161   DMBoundaryType         bx, by;
1162   ISLocalToGlobalMapping ltog;
1163   DMDAStencilType        st;
1164   PetscBool              removedups = PETSC_FALSE;
1165 
1166   PetscFunctionBegin;
1167   /*
1168          nc - number of components per grid point
1169          col - number of colors needed in one direction for single component problem
1170   */
1171   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st));
1172   col = 2 * s + 1;
1173   /*
1174        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1175        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1176   */
1177   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1178   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1179   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1180   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1181   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1182 
1183   PetscCall(PetscMalloc1(col * col * nc, &cols));
1184   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1185 
1186   PetscCall(MatSetBlockSize(J, nc));
1187   /* determine the matrix preallocation information */
1188   MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
1189   for (i = xs; i < xs + nx; i++) {
1190     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1191     pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1192 
1193     for (j = ys; j < ys + ny; j++) {
1194       slot = i - gxs + gnx * (j - gys);
1195 
1196       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1197       lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1198 
1199       for (k = 0; k < nc; k++) {
1200         cnt = 0;
1201         for (l = lstart; l < lend + 1; l++) {
1202           for (p = pstart; p < pend + 1; p++) {
1203             if (l || p) {
1204               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */
1205                 for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p);
1206               }
1207             } else {
1208               if (dfill) {
1209                 for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p);
1210               } else {
1211                 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p);
1212               }
1213             }
1214           }
1215         }
1216         row    = k + nc * (slot);
1217         maxcnt = PetscMax(maxcnt, cnt);
1218         if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
1219         else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
1220       }
1221     }
1222   }
1223   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
1224   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
1225   MatPreallocateEnd(dnz, onz);
1226   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1227 
1228   /*
1229     For each node in the grid: we get the neighbors in the local (on processor ordering
1230     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1231     PETSc ordering.
1232   */
1233   if (!da->prealloc_only) {
1234     for (i = xs; i < xs + nx; i++) {
1235       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1236       pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1237 
1238       for (j = ys; j < ys + ny; j++) {
1239         slot = i - gxs + gnx * (j - gys);
1240 
1241         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1242         lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1243 
1244         for (k = 0; k < nc; k++) {
1245           cnt = 0;
1246           for (l = lstart; l < lend + 1; l++) {
1247             for (p = pstart; p < pend + 1; p++) {
1248               if (l || p) {
1249                 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */
1250                   for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p);
1251                 }
1252               } else {
1253                 if (dfill) {
1254                   for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p);
1255                 } else {
1256                   for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p);
1257                 }
1258               }
1259             }
1260           }
1261           row = k + nc * (slot);
1262           PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1263         }
1264       }
1265     }
1266     /* do not copy values to GPU since they are all zero and not yet needed there */
1267     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1268     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1269     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1270     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1271     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1272   }
1273   PetscCall(PetscFree(cols));
1274   PetscFunctionReturn(PETSC_SUCCESS);
1275 }
1276 
1277 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da, Mat J)
1278 {
1279   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1280   PetscInt               m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL;
1281   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
1282   MPI_Comm               comm;
1283   DMBoundaryType         bx, by, bz;
1284   ISLocalToGlobalMapping ltog, mltog;
1285   DMDAStencilType        st;
1286   PetscBool              removedups = PETSC_FALSE;
1287 
1288   PetscFunctionBegin;
1289   /*
1290          nc - number of components per grid point
1291          col - number of colors needed in one direction for single component problem
1292   */
1293   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
1294   if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE));
1295   col = 2 * s + 1;
1296 
1297   /*
1298        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1299        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1300   */
1301   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1302   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1303   if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE;
1304 
1305   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
1306   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
1307   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1308 
1309   PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols));
1310   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1311 
1312   PetscCall(MatSetBlockSize(J, nc));
1313   /* determine the matrix preallocation information */
1314   MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
1315   for (i = xs; i < xs + nx; i++) {
1316     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1317     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1318     for (j = ys; j < ys + ny; j++) {
1319       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1320       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1321       for (k = zs; k < zs + nz; k++) {
1322         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1323         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1324 
1325         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1326 
1327         cnt = 0;
1328         for (l = 0; l < nc; l++) {
1329           for (ii = istart; ii < iend + 1; ii++) {
1330             for (jj = jstart; jj < jend + 1; jj++) {
1331               for (kk = kstart; kk < kend + 1; kk++) {
1332                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1333                   cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
1334                 }
1335               }
1336             }
1337           }
1338           rows[l] = l + nc * (slot);
1339         }
1340         if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1341         else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1342       }
1343     }
1344   }
1345   PetscCall(MatSetBlockSize(J, nc));
1346   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
1347   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
1348   MatPreallocateEnd(dnz, onz);
1349   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1350   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1351 
1352   /*
1353     For each node in the grid: we get the neighbors in the local (on processor ordering
1354     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1355     PETSc ordering.
1356   */
1357   if (!da->prealloc_only) {
1358     for (i = xs; i < xs + nx; i++) {
1359       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1360       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1361       for (j = ys; j < ys + ny; j++) {
1362         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1363         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1364         for (k = zs; k < zs + nz; k++) {
1365           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1366           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1367 
1368           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1369 
1370           cnt = 0;
1371           for (kk = kstart; kk < kend + 1; kk++) {
1372             for (jj = jstart; jj < jend + 1; jj++) {
1373               for (ii = istart; ii < iend + 1; ii++) {
1374                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1375                   cols[cnt++] = nc * (slot + ii + gnx * jj + gnx * gny * kk);
1376                   for (l = 1; l < nc; l++) {
1377                     cols[cnt] = 1 + cols[cnt - 1];
1378                     cnt++;
1379                   }
1380                 }
1381               }
1382             }
1383           }
1384           rows[0] = nc * (slot);
1385           for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1386           PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1387         }
1388       }
1389     }
1390     /* do not copy values to GPU since they are all zero and not yet needed there */
1391     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1392     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1393     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1394     if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1395     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1396     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1397   }
1398   PetscCall(PetscFree2(rows, cols));
1399   PetscFunctionReturn(PETSC_SUCCESS);
1400 }
1401 
1402 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da, Mat J)
1403 {
1404   DM_DA                 *dd = (DM_DA *)da->data;
1405   PetscInt               xs, nx, i, j, gxs, gnx, row, k, l;
1406   PetscInt               m, dim, s, *cols = NULL, nc, cnt, maxcnt = 0, *ocols;
1407   PetscInt              *ofill = dd->ofill, *dfill = dd->dfill;
1408   DMBoundaryType         bx;
1409   ISLocalToGlobalMapping ltog;
1410   PetscMPIInt            rank, size;
1411 
1412   PetscFunctionBegin;
1413   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)da), &rank));
1414   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size));
1415 
1416   /*
1417          nc - number of components per grid point
1418   */
1419   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
1420   PetscCheck(s <= 1, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Matrix creation for 1d not implemented correctly for stencil width larger than 1");
1421   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
1422   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
1423 
1424   PetscCall(MatSetBlockSize(J, nc));
1425   PetscCall(PetscCalloc2(nx * nc, &cols, nx * nc, &ocols));
1426 
1427   /*
1428         note should be smaller for first and last process with no periodic
1429         does not handle dfill
1430   */
1431   cnt = 0;
1432   /* coupling with process to the left */
1433   for (i = 0; i < s; i++) {
1434     for (j = 0; j < nc; j++) {
1435       ocols[cnt] = ((rank == 0) ? 0 : (s - i) * (ofill[j + 1] - ofill[j]));
1436       cols[cnt]  = dfill[j + 1] - dfill[j] + (s + i) * (ofill[j + 1] - ofill[j]);
1437       if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1438         if (size > 1) ocols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]);
1439         else cols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]);
1440       }
1441       maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1442       cnt++;
1443     }
1444   }
1445   for (i = s; i < nx - s; i++) {
1446     for (j = 0; j < nc; j++) {
1447       cols[cnt] = dfill[j + 1] - dfill[j] + 2 * s * (ofill[j + 1] - ofill[j]);
1448       maxcnt    = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1449       cnt++;
1450     }
1451   }
1452   /* coupling with process to the right */
1453   for (i = nx - s; i < nx; i++) {
1454     for (j = 0; j < nc; j++) {
1455       ocols[cnt] = ((rank == (size - 1)) ? 0 : (i - nx + s + 1) * (ofill[j + 1] - ofill[j]));
1456       cols[cnt]  = dfill[j + 1] - dfill[j] + (s + nx - i - 1) * (ofill[j + 1] - ofill[j]);
1457       if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1458         if (size > 1) ocols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]);
1459         else cols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]);
1460       }
1461       maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1462       cnt++;
1463     }
1464   }
1465 
1466   PetscCall(MatSeqAIJSetPreallocation(J, 0, cols));
1467   PetscCall(MatMPIAIJSetPreallocation(J, 0, cols, 0, ocols));
1468   PetscCall(PetscFree2(cols, ocols));
1469 
1470   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1471   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1472 
1473   /*
1474     For each node in the grid: we get the neighbors in the local (on processor ordering
1475     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1476     PETSc ordering.
1477   */
1478   if (!da->prealloc_only) {
1479     PetscCall(PetscMalloc1(maxcnt, &cols));
1480     row = xs * nc;
1481     /* coupling with process to the left */
1482     for (i = xs; i < xs + s; i++) {
1483       for (j = 0; j < nc; j++) {
1484         cnt = 0;
1485         if (rank) {
1486           for (l = 0; l < s; l++) {
1487             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1488           }
1489         }
1490         if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1491           for (l = 0; l < s; l++) {
1492             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (m + i - s - l) * nc + ofill[k];
1493           }
1494         }
1495         if (dfill) {
1496           for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1497         } else {
1498           for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1499         }
1500         for (l = 0; l < s; l++) {
1501           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1502         }
1503         PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1504         row++;
1505       }
1506     }
1507     for (i = xs + s; i < xs + nx - s; i++) {
1508       for (j = 0; j < nc; j++) {
1509         cnt = 0;
1510         for (l = 0; l < s; l++) {
1511           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1512         }
1513         if (dfill) {
1514           for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1515         } else {
1516           for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1517         }
1518         for (l = 0; l < s; l++) {
1519           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1520         }
1521         PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1522         row++;
1523       }
1524     }
1525     /* coupling with process to the right */
1526     for (i = xs + nx - s; i < xs + nx; i++) {
1527       for (j = 0; j < nc; j++) {
1528         cnt = 0;
1529         for (l = 0; l < s; l++) {
1530           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1531         }
1532         if (dfill) {
1533           for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1534         } else {
1535           for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1536         }
1537         if (rank < size - 1) {
1538           for (l = 0; l < s; l++) {
1539             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1540           }
1541         }
1542         if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1543           for (l = 0; l < s; l++) {
1544             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s - l - m + 2) * nc + ofill[k];
1545           }
1546         }
1547         PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1548         row++;
1549       }
1550     }
1551     PetscCall(PetscFree(cols));
1552     /* do not copy values to GPU since they are all zero and not yet needed there */
1553     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1554     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1555     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1556     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1557     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1558   }
1559   PetscFunctionReturn(PETSC_SUCCESS);
1560 }
1561 
1562 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da, Mat J)
1563 {
1564   PetscInt               xs, nx, i, i1, slot, gxs, gnx;
1565   PetscInt               m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l;
1566   PetscInt               istart, iend;
1567   DMBoundaryType         bx;
1568   ISLocalToGlobalMapping ltog, mltog;
1569 
1570   PetscFunctionBegin;
1571   /*
1572          nc - number of components per grid point
1573          col - number of colors needed in one direction for single component problem
1574   */
1575   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
1576   if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE));
1577   col = 2 * s + 1;
1578 
1579   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
1580   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
1581 
1582   PetscCall(MatSetBlockSize(J, nc));
1583   PetscCall(MatSeqAIJSetPreallocation(J, col * nc, NULL));
1584   PetscCall(MatMPIAIJSetPreallocation(J, col * nc, NULL, col * nc, NULL));
1585 
1586   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1587   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1588   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1589 
1590   /*
1591     For each node in the grid: we get the neighbors in the local (on processor ordering
1592     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1593     PETSc ordering.
1594   */
1595   if (!da->prealloc_only) {
1596     PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols));
1597     for (i = xs; i < xs + nx; i++) {
1598       istart = PetscMax(-s, gxs - i);
1599       iend   = PetscMin(s, gxs + gnx - i - 1);
1600       slot   = i - gxs;
1601 
1602       cnt = 0;
1603       for (i1 = istart; i1 < iend + 1; i1++) {
1604         cols[cnt++] = nc * (slot + i1);
1605         for (l = 1; l < nc; l++) {
1606           cols[cnt] = 1 + cols[cnt - 1];
1607           cnt++;
1608         }
1609       }
1610       rows[0] = nc * (slot);
1611       for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1612       PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1613     }
1614     /* do not copy values to GPU since they are all zero and not yet needed there */
1615     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1616     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1617     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1618     if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1619     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1620     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1621     PetscCall(PetscFree2(rows, cols));
1622   }
1623   PetscFunctionReturn(PETSC_SUCCESS);
1624 }
1625 
1626 PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM da, Mat J)
1627 {
1628   PetscInt               xs, nx, i, i1, slot, gxs, gnx;
1629   PetscInt               m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l;
1630   PetscInt               istart, iend;
1631   DMBoundaryType         bx;
1632   ISLocalToGlobalMapping ltog, mltog;
1633 
1634   PetscFunctionBegin;
1635   /*
1636          nc - number of components per grid point
1637          col - number of colors needed in one direction for single component problem
1638   */
1639   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
1640   col = 2 * s + 1;
1641 
1642   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
1643   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
1644 
1645   PetscCall(MatSetBlockSize(J, nc));
1646   PetscCall(MatSeqAIJSetTotalPreallocation(J, nx * nc * col * nc));
1647 
1648   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1649   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1650   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1651 
1652   /*
1653     For each node in the grid: we get the neighbors in the local (on processor ordering
1654     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1655     PETSc ordering.
1656   */
1657   if (!da->prealloc_only) {
1658     PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols));
1659     for (i = xs; i < xs + nx; i++) {
1660       istart = PetscMax(-s, gxs - i);
1661       iend   = PetscMin(s, gxs + gnx - i - 1);
1662       slot   = i - gxs;
1663 
1664       cnt = 0;
1665       for (i1 = istart; i1 < iend + 1; i1++) {
1666         cols[cnt++] = nc * (slot + i1);
1667         for (l = 1; l < nc; l++) {
1668           cols[cnt] = 1 + cols[cnt - 1];
1669           cnt++;
1670         }
1671       }
1672       rows[0] = nc * (slot);
1673       for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1674       PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1675     }
1676     /* do not copy values to GPU since they are all zero and not yet needed there */
1677     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1678     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1679     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1680     if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1681     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1682     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1683     PetscCall(PetscFree2(rows, cols));
1684   }
1685   PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1686   PetscFunctionReturn(PETSC_SUCCESS);
1687 }
1688 
1689 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da, Mat J)
1690 {
1691   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1692   PetscInt               m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz;
1693   PetscInt               istart, iend, jstart, jend, ii, jj;
1694   MPI_Comm               comm;
1695   PetscScalar           *values;
1696   DMBoundaryType         bx, by;
1697   DMDAStencilType        st;
1698   ISLocalToGlobalMapping ltog;
1699 
1700   PetscFunctionBegin;
1701   /*
1702      nc - number of components per grid point
1703      col - number of colors needed in one direction for single component problem
1704   */
1705   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st));
1706   col = 2 * s + 1;
1707 
1708   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1709   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1710   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1711 
1712   PetscCall(PetscMalloc1(col * col * nc * nc, &cols));
1713 
1714   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1715 
1716   /* determine the matrix preallocation information */
1717   MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz);
1718   for (i = xs; i < xs + nx; i++) {
1719     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1720     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1721     for (j = ys; j < ys + ny; j++) {
1722       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1723       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1724       slot   = i - gxs + gnx * (j - gys);
1725 
1726       /* Find block columns in block row */
1727       cnt = 0;
1728       for (ii = istart; ii < iend + 1; ii++) {
1729         for (jj = jstart; jj < jend + 1; jj++) {
1730           if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1731             cols[cnt++] = slot + ii + gnx * jj;
1732           }
1733         }
1734       }
1735       PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz));
1736     }
1737   }
1738   PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz));
1739   PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
1740   MatPreallocateEnd(dnz, onz);
1741 
1742   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1743 
1744   /*
1745     For each node in the grid: we get the neighbors in the local (on processor ordering
1746     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1747     PETSc ordering.
1748   */
1749   if (!da->prealloc_only) {
1750     PetscCall(PetscCalloc1(col * col * nc * nc, &values));
1751     for (i = xs; i < xs + nx; i++) {
1752       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1753       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1754       for (j = ys; j < ys + ny; j++) {
1755         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1756         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1757         slot   = i - gxs + gnx * (j - gys);
1758         cnt    = 0;
1759         for (ii = istart; ii < iend + 1; ii++) {
1760           for (jj = jstart; jj < jend + 1; jj++) {
1761             if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1762               cols[cnt++] = slot + ii + gnx * jj;
1763             }
1764           }
1765         }
1766         PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
1767       }
1768     }
1769     PetscCall(PetscFree(values));
1770     /* do not copy values to GPU since they are all zero and not yet needed there */
1771     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1772     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1773     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1774     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1775     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1776   }
1777   PetscCall(PetscFree(cols));
1778   PetscFunctionReturn(PETSC_SUCCESS);
1779 }
1780 
1781 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da, Mat J)
1782 {
1783   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1784   PetscInt               m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz;
1785   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk;
1786   MPI_Comm               comm;
1787   PetscScalar           *values;
1788   DMBoundaryType         bx, by, bz;
1789   DMDAStencilType        st;
1790   ISLocalToGlobalMapping ltog;
1791 
1792   PetscFunctionBegin;
1793   /*
1794          nc - number of components per grid point
1795          col - number of colors needed in one direction for single component problem
1796   */
1797   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st));
1798   col = 2 * s + 1;
1799 
1800   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
1801   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
1802   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1803 
1804   PetscCall(PetscMalloc1(col * col * col, &cols));
1805 
1806   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1807 
1808   /* determine the matrix preallocation information */
1809   MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz);
1810   for (i = xs; i < xs + nx; i++) {
1811     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1812     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1813     for (j = ys; j < ys + ny; j++) {
1814       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1815       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1816       for (k = zs; k < zs + nz; k++) {
1817         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1818         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1819 
1820         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1821 
1822         /* Find block columns in block row */
1823         cnt = 0;
1824         for (ii = istart; ii < iend + 1; ii++) {
1825           for (jj = jstart; jj < jend + 1; jj++) {
1826             for (kk = kstart; kk < kend + 1; kk++) {
1827               if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1828                 cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
1829               }
1830             }
1831           }
1832         }
1833         PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz));
1834       }
1835     }
1836   }
1837   PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz));
1838   PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
1839   MatPreallocateEnd(dnz, onz);
1840 
1841   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1842 
1843   /*
1844     For each node in the grid: we get the neighbors in the local (on processor ordering
1845     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1846     PETSc ordering.
1847   */
1848   if (!da->prealloc_only) {
1849     PetscCall(PetscCalloc1(col * col * col * nc * nc, &values));
1850     for (i = xs; i < xs + nx; i++) {
1851       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1852       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1853       for (j = ys; j < ys + ny; j++) {
1854         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1855         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1856         for (k = zs; k < zs + nz; k++) {
1857           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1858           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1859 
1860           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1861 
1862           cnt = 0;
1863           for (ii = istart; ii < iend + 1; ii++) {
1864             for (jj = jstart; jj < jend + 1; jj++) {
1865               for (kk = kstart; kk < kend + 1; kk++) {
1866                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1867                   cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
1868                 }
1869               }
1870             }
1871           }
1872           PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
1873         }
1874       }
1875     }
1876     PetscCall(PetscFree(values));
1877     /* do not copy values to GPU since they are all zero and not yet needed there */
1878     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1879     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1880     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1881     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1882     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1883   }
1884   PetscCall(PetscFree(cols));
1885   PetscFunctionReturn(PETSC_SUCCESS);
1886 }
1887 
1888 /*
1889   This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to
1890   identify in the local ordering with periodic domain.
1891 */
1892 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog, PetscInt *row, PetscInt *cnt, PetscInt col[])
1893 {
1894   PetscInt i, n;
1895 
1896   PetscFunctionBegin;
1897   PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, 1, row, row));
1898   PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, *cnt, col, col));
1899   for (i = 0, n = 0; i < *cnt; i++) {
1900     if (col[i] >= *row) col[n++] = col[i];
1901   }
1902   *cnt = n;
1903   PetscFunctionReturn(PETSC_SUCCESS);
1904 }
1905 
1906 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da, Mat J)
1907 {
1908   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1909   PetscInt               m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz;
1910   PetscInt               istart, iend, jstart, jend, ii, jj;
1911   MPI_Comm               comm;
1912   PetscScalar           *values;
1913   DMBoundaryType         bx, by;
1914   DMDAStencilType        st;
1915   ISLocalToGlobalMapping ltog;
1916 
1917   PetscFunctionBegin;
1918   /*
1919      nc - number of components per grid point
1920      col - number of colors needed in one direction for single component problem
1921   */
1922   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st));
1923   col = 2 * s + 1;
1924 
1925   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1926   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1927   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1928 
1929   PetscCall(PetscMalloc1(col * col * nc * nc, &cols));
1930 
1931   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1932 
1933   /* determine the matrix preallocation information */
1934   MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz);
1935   for (i = xs; i < xs + nx; i++) {
1936     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1937     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1938     for (j = ys; j < ys + ny; j++) {
1939       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1940       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1941       slot   = i - gxs + gnx * (j - gys);
1942 
1943       /* Find block columns in block row */
1944       cnt = 0;
1945       for (ii = istart; ii < iend + 1; ii++) {
1946         for (jj = jstart; jj < jend + 1; jj++) {
1947           if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj;
1948         }
1949       }
1950       PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
1951       PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz));
1952     }
1953   }
1954   PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz));
1955   PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
1956   MatPreallocateEnd(dnz, onz);
1957 
1958   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1959 
1960   /*
1961     For each node in the grid: we get the neighbors in the local (on processor ordering
1962     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1963     PETSc ordering.
1964   */
1965   if (!da->prealloc_only) {
1966     PetscCall(PetscCalloc1(col * col * nc * nc, &values));
1967     for (i = xs; i < xs + nx; i++) {
1968       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1969       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1970       for (j = ys; j < ys + ny; j++) {
1971         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1972         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1973         slot   = i - gxs + gnx * (j - gys);
1974 
1975         /* Find block columns in block row */
1976         cnt = 0;
1977         for (ii = istart; ii < iend + 1; ii++) {
1978           for (jj = jstart; jj < jend + 1; jj++) {
1979             if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj;
1980           }
1981         }
1982         PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
1983         PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
1984       }
1985     }
1986     PetscCall(PetscFree(values));
1987     /* do not copy values to GPU since they are all zero and not yet needed there */
1988     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1989     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1990     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1991     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1992     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1993   }
1994   PetscCall(PetscFree(cols));
1995   PetscFunctionReturn(PETSC_SUCCESS);
1996 }
1997 
1998 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da, Mat J)
1999 {
2000   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
2001   PetscInt               m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz;
2002   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk;
2003   MPI_Comm               comm;
2004   PetscScalar           *values;
2005   DMBoundaryType         bx, by, bz;
2006   DMDAStencilType        st;
2007   ISLocalToGlobalMapping ltog;
2008 
2009   PetscFunctionBegin;
2010   /*
2011      nc - number of components per grid point
2012      col - number of colors needed in one direction for single component problem
2013   */
2014   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st));
2015   col = 2 * s + 1;
2016 
2017   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
2018   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
2019   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
2020 
2021   /* create the matrix */
2022   PetscCall(PetscMalloc1(col * col * col, &cols));
2023 
2024   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
2025 
2026   /* determine the matrix preallocation information */
2027   MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz);
2028   for (i = xs; i < xs + nx; i++) {
2029     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2030     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2031     for (j = ys; j < ys + ny; j++) {
2032       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2033       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2034       for (k = zs; k < zs + nz; k++) {
2035         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2036         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2037 
2038         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2039 
2040         /* Find block columns in block row */
2041         cnt = 0;
2042         for (ii = istart; ii < iend + 1; ii++) {
2043           for (jj = jstart; jj < jend + 1; jj++) {
2044             for (kk = kstart; kk < kend + 1; kk++) {
2045               if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
2046             }
2047           }
2048         }
2049         PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
2050         PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz));
2051       }
2052     }
2053   }
2054   PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz));
2055   PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
2056   MatPreallocateEnd(dnz, onz);
2057 
2058   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
2059 
2060   /*
2061     For each node in the grid: we get the neighbors in the local (on processor ordering
2062     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2063     PETSc ordering.
2064   */
2065   if (!da->prealloc_only) {
2066     PetscCall(PetscCalloc1(col * col * col * nc * nc, &values));
2067     for (i = xs; i < xs + nx; i++) {
2068       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2069       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2070       for (j = ys; j < ys + ny; j++) {
2071         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2072         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2073         for (k = zs; k < zs + nz; k++) {
2074           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2075           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2076 
2077           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2078 
2079           cnt = 0;
2080           for (ii = istart; ii < iend + 1; ii++) {
2081             for (jj = jstart; jj < jend + 1; jj++) {
2082               for (kk = kstart; kk < kend + 1; kk++) {
2083                 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
2084               }
2085             }
2086           }
2087           PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
2088           PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
2089         }
2090       }
2091     }
2092     PetscCall(PetscFree(values));
2093     /* do not copy values to GPU since they are all zero and not yet needed there */
2094     PetscCall(MatBindToCPU(J, PETSC_TRUE));
2095     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
2096     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
2097     PetscCall(MatBindToCPU(J, PETSC_FALSE));
2098     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2099   }
2100   PetscCall(PetscFree(cols));
2101   PetscFunctionReturn(PETSC_SUCCESS);
2102 }
2103 
2104 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da, Mat J)
2105 {
2106   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
2107   PetscInt               m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, *dnz, *onz;
2108   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
2109   DM_DA                 *dd = (DM_DA *)da->data;
2110   PetscInt               ifill_col, *dfill = dd->dfill, *ofill = dd->ofill;
2111   MPI_Comm               comm;
2112   PetscScalar           *values;
2113   DMBoundaryType         bx, by, bz;
2114   ISLocalToGlobalMapping ltog;
2115   DMDAStencilType        st;
2116   PetscBool              removedups = PETSC_FALSE;
2117 
2118   PetscFunctionBegin;
2119   /*
2120          nc - number of components per grid point
2121          col - number of colors needed in one direction for single component problem
2122   */
2123   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
2124   col = 2 * s + 1;
2125 
2126   /*
2127        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
2128        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
2129   */
2130   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
2131   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
2132   if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE;
2133 
2134   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
2135   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
2136   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
2137 
2138   PetscCall(PetscMalloc1(col * col * col * nc, &cols));
2139   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
2140 
2141   /* determine the matrix preallocation information */
2142   MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
2143 
2144   PetscCall(MatSetBlockSize(J, nc));
2145   for (i = xs; i < xs + nx; i++) {
2146     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2147     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2148     for (j = ys; j < ys + ny; j++) {
2149       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2150       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2151       for (k = zs; k < zs + nz; k++) {
2152         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2153         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2154 
2155         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2156 
2157         for (l = 0; l < nc; l++) {
2158           cnt = 0;
2159           for (ii = istart; ii < iend + 1; ii++) {
2160             for (jj = jstart; jj < jend + 1; jj++) {
2161               for (kk = kstart; kk < kend + 1; kk++) {
2162                 if (ii || jj || kk) {
2163                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
2164                     for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2165                   }
2166                 } else {
2167                   if (dfill) {
2168                     for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2169                   } else {
2170                     for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2171                   }
2172                 }
2173               }
2174             }
2175           }
2176           row    = l + nc * (slot);
2177           maxcnt = PetscMax(maxcnt, cnt);
2178           if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
2179           else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
2180         }
2181       }
2182     }
2183   }
2184   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
2185   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
2186   MatPreallocateEnd(dnz, onz);
2187   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
2188 
2189   /*
2190     For each node in the grid: we get the neighbors in the local (on processor ordering
2191     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2192     PETSc ordering.
2193   */
2194   if (!da->prealloc_only) {
2195     PetscCall(PetscCalloc1(maxcnt, &values));
2196     for (i = xs; i < xs + nx; i++) {
2197       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2198       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2199       for (j = ys; j < ys + ny; j++) {
2200         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2201         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2202         for (k = zs; k < zs + nz; k++) {
2203           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2204           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2205 
2206           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2207 
2208           for (l = 0; l < nc; l++) {
2209             cnt = 0;
2210             for (ii = istart; ii < iend + 1; ii++) {
2211               for (jj = jstart; jj < jend + 1; jj++) {
2212                 for (kk = kstart; kk < kend + 1; kk++) {
2213                   if (ii || jj || kk) {
2214                     if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
2215                       for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2216                     }
2217                   } else {
2218                     if (dfill) {
2219                       for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2220                     } else {
2221                       for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2222                     }
2223                   }
2224                 }
2225               }
2226             }
2227             row = l + nc * (slot);
2228             PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, values, INSERT_VALUES));
2229           }
2230         }
2231       }
2232     }
2233     PetscCall(PetscFree(values));
2234     /* do not copy values to GPU since they are all zero and not yet needed there */
2235     PetscCall(MatBindToCPU(J, PETSC_TRUE));
2236     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
2237     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
2238     PetscCall(MatBindToCPU(J, PETSC_FALSE));
2239     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2240   }
2241   PetscCall(PetscFree(cols));
2242   PetscFunctionReturn(PETSC_SUCCESS);
2243 }
2244