xref: /petsc/src/dm/impls/da/fdda.c (revision 1d27aa22b2f6148b2c4e3f06a75e0638d6493e09)
1 #include <petsc/private/dmdaimpl.h> /*I      "petscdmda.h"     I*/
2 #include <petscmat.h>
3 #include <petscbt.h>
4 
5 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM, ISColoringType, ISColoring *);
6 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM, ISColoringType, ISColoring *);
7 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM, ISColoringType, ISColoring *);
8 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM, ISColoringType, ISColoring *);
9 
10 /*
11    For ghost i that may be negative or greater than the upper bound this
12   maps it into the 0:m-1 range using periodicity
13 */
14 #define SetInRange(i, m) ((i < 0) ? m + i : ((i >= m) ? i - m : i))
15 
16 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill, PetscInt w, PetscInt **rfill)
17 {
18   PetscInt i, j, nz, *fill;
19 
20   PetscFunctionBegin;
21   if (!dfill) PetscFunctionReturn(PETSC_SUCCESS);
22 
23   /* count number nonzeros */
24   nz = 0;
25   for (i = 0; i < w; i++) {
26     for (j = 0; j < w; j++) {
27       if (dfill[w * i + j]) nz++;
28     }
29   }
30   PetscCall(PetscMalloc1(nz + w + 1, &fill));
31   /* construct modified CSR storage of nonzero structure */
32   /*  fill[0 -- w] marks starts of each row of column indices (and end of last row)
33    so fill[1] - fill[0] gives number of nonzeros in first row etc */
34   nz = w + 1;
35   for (i = 0; i < w; i++) {
36     fill[i] = nz;
37     for (j = 0; j < w; j++) {
38       if (dfill[w * i + j]) {
39         fill[nz] = j;
40         nz++;
41       }
42     }
43   }
44   fill[w] = nz;
45 
46   *rfill = fill;
47   PetscFunctionReturn(PETSC_SUCCESS);
48 }
49 
50 static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse, PetscInt w, PetscInt **rfill)
51 {
52   PetscInt nz;
53 
54   PetscFunctionBegin;
55   if (!dfillsparse) PetscFunctionReturn(PETSC_SUCCESS);
56 
57   /* Determine number of non-zeros */
58   nz = (dfillsparse[w] - w - 1);
59 
60   /* Allocate space for our copy of the given sparse matrix representation. */
61   PetscCall(PetscMalloc1(nz + w + 1, rfill));
62   PetscCall(PetscArraycpy(*rfill, dfillsparse, nz + w + 1));
63   PetscFunctionReturn(PETSC_SUCCESS);
64 }
65 
66 static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd)
67 {
68   PetscInt i, k, cnt = 1;
69 
70   PetscFunctionBegin;
71 
72   /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of
73    columns to the left with any nonzeros in them plus 1 */
74   PetscCall(PetscCalloc1(dd->w, &dd->ofillcols));
75   for (i = 0; i < dd->w; i++) {
76     for (k = dd->ofill[i]; k < dd->ofill[i + 1]; k++) dd->ofillcols[dd->ofill[k]] = 1;
77   }
78   for (i = 0; i < dd->w; i++) {
79     if (dd->ofillcols[i]) dd->ofillcols[i] = cnt++;
80   }
81   PetscFunctionReturn(PETSC_SUCCESS);
82 }
83 
84 /*@
85   DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem
86   of the matrix returned by `DMCreateMatrix()`.
87 
88   Logically Collective
89 
90   Input Parameters:
91 + da    - the distributed array
92 . dfill - the fill pattern in the diagonal block (may be NULL, means use dense block)
93 - ofill - the fill pattern in the off-diagonal blocks
94 
95   Level: developer
96 
97   Notes:
98   This only makes sense when you are doing multicomponent problems but using the
99   `MATMPIAIJ` matrix format
100 
101   The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries
102   representing coupling and 0 entries for missing coupling. For example
103 .vb
104             dfill[9] = {1, 0, 0,
105                         1, 1, 0,
106                         0, 1, 1}
107 .ve
108   means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with
109   itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the
110   diagonal block).
111 
112   `DMDASetGetMatrix()` allows you to provide general code for those more complicated nonzero patterns then
113   can be represented in the dfill, ofill format
114 
115   Contributed by\:
116   Glenn Hammond
117 
118 .seealso: `DM`, `DMDA`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()`
119 @*/
120 PetscErrorCode DMDASetBlockFills(DM da, const PetscInt *dfill, const PetscInt *ofill)
121 {
122   DM_DA *dd = (DM_DA *)da->data;
123 
124   PetscFunctionBegin;
125   /* save the given dfill and ofill information */
126   PetscCall(DMDASetBlockFills_Private(dfill, dd->w, &dd->dfill));
127   PetscCall(DMDASetBlockFills_Private(ofill, dd->w, &dd->ofill));
128 
129   /* count nonzeros in ofill columns */
130   PetscCall(DMDASetBlockFills_Private2(dd));
131   PetscFunctionReturn(PETSC_SUCCESS);
132 }
133 
134 /*@
135   DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem
136   of the matrix returned by `DMCreateMatrix()`, using sparse representations
137   of fill patterns.
138 
139   Logically Collective
140 
141   Input Parameters:
142 + da          - the distributed array
143 . dfillsparse - the sparse fill pattern in the diagonal block (may be `NULL`, means use dense block)
144 - ofillsparse - the sparse fill pattern in the off-diagonal blocks
145 
146   Level: developer
147 
148   Notes:
149   This only makes sense when you are doing multicomponent problems but using the
150   `MATMPIAIJ` matrix format
151 
152   The format for `dfill` and `ofill` is a sparse representation of a
153   dof-by-dof matrix with 1 entries representing coupling and 0 entries
154   for missing coupling.  The sparse representation is a 1 dimensional
155   array of length nz + dof + 1, where nz is the number of non-zeros in
156   the matrix.  The first dof entries in the array give the
157   starting array indices of each row's items in the rest of the array,
158   the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array)
159   and the remaining nz items give the column indices of each of
160   the 1s within the logical 2D matrix.  Each row's items within
161   the array are the column indices of the 1s within that row
162   of the 2D matrix.  PETSc developers may recognize that this is the
163   same format as that computed by the `DMDASetBlockFills_Private()`
164   function from a dense 2D matrix representation.
165 
166   `DMDASetGetMatrix()` allows you to provide general code for those more complicated nonzero patterns then
167   can be represented in the `dfill`, `ofill` format
168 
169   Contributed by\:
170   Philip C. Roth
171 
172 .seealso: `DM`, `DMDA`, `DMDASetBlockFills()`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()`
173 @*/
174 PetscErrorCode DMDASetBlockFillsSparse(DM da, const PetscInt *dfillsparse, const PetscInt *ofillsparse)
175 {
176   DM_DA *dd = (DM_DA *)da->data;
177 
178   PetscFunctionBegin;
179   /* save the given dfill and ofill information */
180   PetscCall(DMDASetBlockFillsSparse_Private(dfillsparse, dd->w, &dd->dfill));
181   PetscCall(DMDASetBlockFillsSparse_Private(ofillsparse, dd->w, &dd->ofill));
182 
183   /* count nonzeros in ofill columns */
184   PetscCall(DMDASetBlockFills_Private2(dd));
185   PetscFunctionReturn(PETSC_SUCCESS);
186 }
187 
188 PetscErrorCode DMCreateColoring_DA(DM da, ISColoringType ctype, ISColoring *coloring)
189 {
190   PetscInt       dim, m, n, p, nc;
191   DMBoundaryType bx, by, bz;
192   MPI_Comm       comm;
193   PetscMPIInt    size;
194   PetscBool      isBAIJ;
195   DM_DA         *dd = (DM_DA *)da->data;
196 
197   PetscFunctionBegin;
198   /*
199                                   m
200           ------------------------------------------------------
201          |                                                     |
202          |                                                     |
203          |               ----------------------                |
204          |               |                    |                |
205       n  |           yn  |                    |                |
206          |               |                    |                |
207          |               .---------------------                |
208          |             (xs,ys)     xn                          |
209          |            .                                        |
210          |         (gxs,gys)                                   |
211          |                                                     |
212           -----------------------------------------------------
213   */
214 
215   /*
216          nc - number of components per grid point
217          col - number of colors needed in one direction for single component problem
218 
219   */
220   PetscCall(DMDAGetInfo(da, &dim, NULL, NULL, NULL, &m, &n, &p, &nc, NULL, &bx, &by, &bz, NULL));
221 
222   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
223   PetscCallMPI(MPI_Comm_size(comm, &size));
224   if (ctype == IS_COLORING_LOCAL) {
225     if (size == 1) {
226       ctype = IS_COLORING_GLOBAL;
227     } else {
228       PetscCheck((dim == 1) || !((m == 1 && bx == DM_BOUNDARY_PERIODIC) || (n == 1 && by == DM_BOUNDARY_PERIODIC) || (p == 1 && bz == DM_BOUNDARY_PERIODIC)), PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain on the same process");
229     }
230   }
231 
232   /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ
233      matrices is for the blocks, not the individual matrix elements  */
234   PetscCall(PetscStrbeginswith(da->mattype, MATBAIJ, &isBAIJ));
235   if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATMPIBAIJ, &isBAIJ));
236   if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATSEQBAIJ, &isBAIJ));
237   if (isBAIJ) {
238     dd->w  = 1;
239     dd->xs = dd->xs / nc;
240     dd->xe = dd->xe / nc;
241     dd->Xs = dd->Xs / nc;
242     dd->Xe = dd->Xe / nc;
243   }
244 
245   /*
246      We do not provide a getcoloring function in the DMDA operations because
247    the basic DMDA does not know about matrices. We think of DMDA as being
248    more low-level then matrices.
249   */
250   if (dim == 1) PetscCall(DMCreateColoring_DA_1d_MPIAIJ(da, ctype, coloring));
251   else if (dim == 2) PetscCall(DMCreateColoring_DA_2d_MPIAIJ(da, ctype, coloring));
252   else if (dim == 3) PetscCall(DMCreateColoring_DA_3d_MPIAIJ(da, ctype, coloring));
253   else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not done for %" PetscInt_FMT " dimension, send us mail petsc-maint@mcs.anl.gov for code", dim);
254   if (isBAIJ) {
255     dd->w  = nc;
256     dd->xs = dd->xs * nc;
257     dd->xe = dd->xe * nc;
258     dd->Xs = dd->Xs * nc;
259     dd->Xe = dd->Xe * nc;
260   }
261   PetscFunctionReturn(PETSC_SUCCESS);
262 }
263 
264 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
265 {
266   PetscInt         xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, M, N, dim, s, k, nc, col;
267   PetscInt         ncolors = 0;
268   MPI_Comm         comm;
269   DMBoundaryType   bx, by;
270   DMDAStencilType  st;
271   ISColoringValue *colors;
272   DM_DA           *dd = (DM_DA *)da->data;
273 
274   PetscFunctionBegin;
275   /*
276          nc - number of components per grid point
277          col - number of colors needed in one direction for single component problem
278 
279   */
280   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st));
281   col = 2 * s + 1;
282   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
283   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
284   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
285 
286   /* special case as taught to us by Paul Hovland */
287   if (st == DMDA_STENCIL_STAR && s == 1) {
288     PetscCall(DMCreateColoring_DA_2d_5pt_MPIAIJ(da, ctype, coloring));
289   } else {
290     if (ctype == IS_COLORING_GLOBAL) {
291       if (!dd->localcoloring) {
292         PetscCall(PetscMalloc1(nc * nx * ny, &colors));
293         ii = 0;
294         for (j = ys; j < ys + ny; j++) {
295           for (i = xs; i < xs + nx; i++) {
296             for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((i % col) + col * (j % col));
297           }
298         }
299         ncolors = nc + nc * (col - 1 + col * (col - 1));
300         PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring));
301       }
302       *coloring = dd->localcoloring;
303     } else if (ctype == IS_COLORING_LOCAL) {
304       if (!dd->ghostedcoloring) {
305         PetscCall(PetscMalloc1(nc * gnx * gny, &colors));
306         ii = 0;
307         for (j = gys; j < gys + gny; j++) {
308           for (i = gxs; i < gxs + gnx; i++) {
309             for (k = 0; k < nc; k++) {
310               /* the complicated stuff is to handle periodic boundaries */
311               colors[ii++] = k + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col));
312             }
313           }
314         }
315         ncolors = nc + nc * (col - 1 + col * (col - 1));
316         PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
317         /* PetscIntView(ncolors,(PetscInt*)colors,0); */
318 
319         PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
320       }
321       *coloring = dd->ghostedcoloring;
322     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
323   }
324   PetscCall(ISColoringReference(*coloring));
325   PetscFunctionReturn(PETSC_SUCCESS);
326 }
327 
328 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
329 {
330   PetscInt         xs, ys, nx, ny, i, j, gxs, gys, gnx, gny, m, n, p, dim, s, k, nc, col, zs, gzs, ii, l, nz, gnz, M, N, P;
331   PetscInt         ncolors;
332   MPI_Comm         comm;
333   DMBoundaryType   bx, by, bz;
334   DMDAStencilType  st;
335   ISColoringValue *colors;
336   DM_DA           *dd = (DM_DA *)da->data;
337 
338   PetscFunctionBegin;
339   /*
340          nc - number of components per grid point
341          col - number of colors needed in one direction for single component problem
342   */
343   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
344   col = 2 * s + 1;
345   PetscCheck(bx != DM_BOUNDARY_PERIODIC || (m % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in X is divisible\n\
346                  by 2*stencil_width + 1\n");
347   PetscCheck(by != DM_BOUNDARY_PERIODIC || (n % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Y is divisible\n\
348                  by 2*stencil_width + 1\n");
349   PetscCheck(bz != DM_BOUNDARY_PERIODIC || (p % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Z is divisible\n\
350                  by 2*stencil_width + 1\n");
351 
352   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
353   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
354   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
355 
356   /* create the coloring */
357   if (ctype == IS_COLORING_GLOBAL) {
358     if (!dd->localcoloring) {
359       PetscCall(PetscMalloc1(nc * nx * ny * nz, &colors));
360       ii = 0;
361       for (k = zs; k < zs + nz; k++) {
362         for (j = ys; j < ys + ny; j++) {
363           for (i = xs; i < xs + nx; i++) {
364             for (l = 0; l < nc; l++) colors[ii++] = l + nc * ((i % col) + col * (j % col) + col * col * (k % col));
365           }
366         }
367       }
368       ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1));
369       PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny * nz, colors, PETSC_OWN_POINTER, &dd->localcoloring));
370     }
371     *coloring = dd->localcoloring;
372   } else if (ctype == IS_COLORING_LOCAL) {
373     if (!dd->ghostedcoloring) {
374       PetscCall(PetscMalloc1(nc * gnx * gny * gnz, &colors));
375       ii = 0;
376       for (k = gzs; k < gzs + gnz; k++) {
377         for (j = gys; j < gys + gny; j++) {
378           for (i = gxs; i < gxs + gnx; i++) {
379             for (l = 0; l < nc; l++) {
380               /* the complicated stuff is to handle periodic boundaries */
381               colors[ii++] = l + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col) + col * col * (SetInRange(k, p) % col));
382             }
383           }
384         }
385       }
386       ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1));
387       PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny * gnz, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
388       PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
389     }
390     *coloring = dd->ghostedcoloring;
391   } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
392   PetscCall(ISColoringReference(*coloring));
393   PetscFunctionReturn(PETSC_SUCCESS);
394 }
395 
396 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
397 {
398   PetscInt         xs, nx, i, i1, gxs, gnx, l, m, M, dim, s, nc, col;
399   PetscInt         ncolors;
400   MPI_Comm         comm;
401   DMBoundaryType   bx;
402   ISColoringValue *colors;
403   DM_DA           *dd = (DM_DA *)da->data;
404 
405   PetscFunctionBegin;
406   /*
407          nc - number of components per grid point
408          col - number of colors needed in one direction for single component problem
409   */
410   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, &M, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
411   col = 2 * s + 1;
412   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
413   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
414   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
415 
416   /* create the coloring */
417   if (ctype == IS_COLORING_GLOBAL) {
418     if (!dd->localcoloring) {
419       PetscCall(PetscMalloc1(nc * nx, &colors));
420       if (dd->ofillcols) {
421         PetscInt tc = 0;
422         for (i = 0; i < nc; i++) tc += (PetscInt)(dd->ofillcols[i] > 0);
423         i1 = 0;
424         for (i = xs; i < xs + nx; i++) {
425           for (l = 0; l < nc; l++) {
426             if (dd->ofillcols[l] && (i % col)) {
427               colors[i1++] = nc - 1 + tc * ((i % col) - 1) + dd->ofillcols[l];
428             } else {
429               colors[i1++] = l;
430             }
431           }
432         }
433         ncolors = nc + 2 * s * tc;
434       } else {
435         i1 = 0;
436         for (i = xs; i < xs + nx; i++) {
437           for (l = 0; l < nc; l++) colors[i1++] = l + nc * (i % col);
438         }
439         ncolors = nc + nc * (col - 1);
440       }
441       PetscCall(ISColoringCreate(comm, ncolors, nc * nx, colors, PETSC_OWN_POINTER, &dd->localcoloring));
442     }
443     *coloring = dd->localcoloring;
444   } else if (ctype == IS_COLORING_LOCAL) {
445     if (!dd->ghostedcoloring) {
446       PetscCall(PetscMalloc1(nc * gnx, &colors));
447       i1 = 0;
448       for (i = gxs; i < gxs + gnx; i++) {
449         for (l = 0; l < nc; l++) {
450           /* the complicated stuff is to handle periodic boundaries */
451           colors[i1++] = l + nc * (SetInRange(i, m) % col);
452         }
453       }
454       ncolors = nc + nc * (col - 1);
455       PetscCall(ISColoringCreate(comm, ncolors, nc * gnx, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
456       PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
457     }
458     *coloring = dd->ghostedcoloring;
459   } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
460   PetscCall(ISColoringReference(*coloring));
461   PetscFunctionReturn(PETSC_SUCCESS);
462 }
463 
464 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
465 {
466   PetscInt         xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, dim, s, k, nc;
467   PetscInt         ncolors;
468   MPI_Comm         comm;
469   DMBoundaryType   bx, by;
470   ISColoringValue *colors;
471   DM_DA           *dd = (DM_DA *)da->data;
472 
473   PetscFunctionBegin;
474   /*
475          nc - number of components per grid point
476          col - number of colors needed in one direction for single component problem
477   */
478   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, NULL));
479   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
480   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
481   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
482   /* create the coloring */
483   if (ctype == IS_COLORING_GLOBAL) {
484     if (!dd->localcoloring) {
485       PetscCall(PetscMalloc1(nc * nx * ny, &colors));
486       ii = 0;
487       for (j = ys; j < ys + ny; j++) {
488         for (i = xs; i < xs + nx; i++) {
489           for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((3 * j + i) % 5);
490         }
491       }
492       ncolors = 5 * nc;
493       PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring));
494     }
495     *coloring = dd->localcoloring;
496   } else if (ctype == IS_COLORING_LOCAL) {
497     if (!dd->ghostedcoloring) {
498       PetscCall(PetscMalloc1(nc * gnx * gny, &colors));
499       ii = 0;
500       for (j = gys; j < gys + gny; j++) {
501         for (i = gxs; i < gxs + gnx; i++) {
502           for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((3 * SetInRange(j, n) + SetInRange(i, m)) % 5);
503         }
504       }
505       ncolors = 5 * nc;
506       PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
507       PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
508     }
509     *coloring = dd->ghostedcoloring;
510   } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
511   PetscFunctionReturn(PETSC_SUCCESS);
512 }
513 
514 /* =========================================================================== */
515 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM, Mat);
516 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM, Mat);
517 extern PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM, Mat);
518 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM, Mat);
519 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM, Mat);
520 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM, Mat);
521 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM, Mat);
522 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM, Mat);
523 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM, Mat);
524 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM, Mat);
525 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM, Mat);
526 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM, Mat);
527 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM, Mat);
528 extern PetscErrorCode DMCreateMatrix_DA_IS(DM, Mat);
529 
530 static PetscErrorCode MatView_MPI_DA(Mat A, PetscViewer viewer)
531 {
532   DM                da;
533   const char       *prefix;
534   Mat               AA, Anatural;
535   AO                ao;
536   PetscInt          rstart, rend, *petsc, i;
537   IS                is;
538   MPI_Comm          comm;
539   PetscViewerFormat format;
540   PetscBool         flag;
541 
542   PetscFunctionBegin;
543   /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */
544   PetscCall(PetscViewerGetFormat(viewer, &format));
545   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(PETSC_SUCCESS);
546 
547   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
548   PetscCall(MatGetDM(A, &da));
549   PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA");
550 
551   PetscCall(DMDAGetAO(da, &ao));
552   PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
553   PetscCall(PetscMalloc1(rend - rstart, &petsc));
554   for (i = rstart; i < rend; i++) petsc[i - rstart] = i;
555   PetscCall(AOApplicationToPetsc(ao, rend - rstart, petsc));
556   PetscCall(ISCreateGeneral(comm, rend - rstart, petsc, PETSC_OWN_POINTER, &is));
557 
558   /* call viewer on natural ordering */
559   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPISELL, &flag));
560   if (flag) {
561     PetscCall(MatConvert(A, MATAIJ, MAT_INITIAL_MATRIX, &AA));
562     A = AA;
563   }
564   PetscCall(MatCreateSubMatrix(A, is, is, MAT_INITIAL_MATRIX, &Anatural));
565   PetscCall(ISDestroy(&is));
566   PetscCall(PetscObjectGetOptionsPrefix((PetscObject)A, &prefix));
567   PetscCall(PetscObjectSetOptionsPrefix((PetscObject)Anatural, prefix));
568   PetscCall(PetscObjectSetName((PetscObject)Anatural, ((PetscObject)A)->name));
569   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE;
570   PetscCall(MatView(Anatural, viewer));
571   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE;
572   PetscCall(MatDestroy(&Anatural));
573   if (flag) PetscCall(MatDestroy(&AA));
574   PetscFunctionReturn(PETSC_SUCCESS);
575 }
576 
577 static PetscErrorCode MatLoad_MPI_DA(Mat A, PetscViewer viewer)
578 {
579   DM       da;
580   Mat      Anatural, Aapp;
581   AO       ao;
582   PetscInt rstart, rend, *app, i, m, n, M, N;
583   IS       is;
584   MPI_Comm comm;
585 
586   PetscFunctionBegin;
587   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
588   PetscCall(MatGetDM(A, &da));
589   PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA");
590 
591   /* Load the matrix in natural ordering */
592   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Anatural));
593   PetscCall(MatSetType(Anatural, ((PetscObject)A)->type_name));
594   PetscCall(MatGetSize(A, &M, &N));
595   PetscCall(MatGetLocalSize(A, &m, &n));
596   PetscCall(MatSetSizes(Anatural, m, n, M, N));
597   PetscCall(MatLoad(Anatural, viewer));
598 
599   /* Map natural ordering to application ordering and create IS */
600   PetscCall(DMDAGetAO(da, &ao));
601   PetscCall(MatGetOwnershipRange(Anatural, &rstart, &rend));
602   PetscCall(PetscMalloc1(rend - rstart, &app));
603   for (i = rstart; i < rend; i++) app[i - rstart] = i;
604   PetscCall(AOPetscToApplication(ao, rend - rstart, app));
605   PetscCall(ISCreateGeneral(comm, rend - rstart, app, PETSC_OWN_POINTER, &is));
606 
607   /* Do permutation and replace header */
608   PetscCall(MatCreateSubMatrix(Anatural, is, is, MAT_INITIAL_MATRIX, &Aapp));
609   PetscCall(MatHeaderReplace(A, &Aapp));
610   PetscCall(ISDestroy(&is));
611   PetscCall(MatDestroy(&Anatural));
612   PetscFunctionReturn(PETSC_SUCCESS);
613 }
614 
615 PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J)
616 {
617   PetscInt    dim, dof, nx, ny, nz, dims[3], starts[3], M, N, P;
618   Mat         A;
619   MPI_Comm    comm;
620   MatType     Atype;
621   MatType     mtype;
622   PetscMPIInt size;
623   DM_DA      *dd    = (DM_DA *)da->data;
624   void (*aij)(void) = NULL, (*baij)(void) = NULL, (*sbaij)(void) = NULL, (*sell)(void) = NULL, (*is)(void) = NULL;
625 
626   PetscFunctionBegin;
627   PetscCall(MatInitializePackage());
628   mtype = da->mattype;
629 
630   /*
631                                   m
632           ------------------------------------------------------
633          |                                                     |
634          |                                                     |
635          |               ----------------------                |
636          |               |                    |                |
637       n  |           ny  |                    |                |
638          |               |                    |                |
639          |               .---------------------                |
640          |             (xs,ys)     nx                          |
641          |            .                                        |
642          |         (gxs,gys)                                   |
643          |                                                     |
644           -----------------------------------------------------
645   */
646 
647   /*
648          nc - number of components per grid point
649          col - number of colors needed in one direction for single component problem
650   */
651   M   = dd->M;
652   N   = dd->N;
653   P   = dd->P;
654   dim = da->dim;
655   dof = dd->w;
656   /* PetscCall(DMDAGetInfo(da,&dim,&M,&N,&P,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL)); */
657   PetscCall(DMDAGetCorners(da, NULL, NULL, NULL, &nx, &ny, &nz));
658   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
659   PetscCall(MatCreate(comm, &A));
660   PetscCall(MatSetSizes(A, dof * nx * ny * nz, dof * nx * ny * nz, dof * M * N * P, dof * M * N * P));
661   PetscCall(MatSetType(A, mtype));
662   PetscCall(MatSetFromOptions(A));
663   if (dof * nx * ny * nz < da->bind_below) {
664     PetscCall(MatSetBindingPropagates(A, PETSC_TRUE));
665     PetscCall(MatBindToCPU(A, PETSC_TRUE));
666   }
667   PetscCall(MatSetDM(A, da));
668   if (da->structure_only) PetscCall(MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE));
669   PetscCall(MatGetType(A, &Atype));
670   /*
671      We do not provide a getmatrix function in the DMDA operations because
672    the basic DMDA does not know about matrices. We think of DMDA as being more
673    more low-level than matrices. This is kind of cheating but, cause sometimes
674    we think of DMDA has higher level than matrices.
675 
676      We could switch based on Atype (or mtype), but we do not since the
677    specialized setting routines depend only on the particular preallocation
678    details of the matrix, not the type itself.
679   */
680   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJSetPreallocation_C", &aij));
681   if (!aij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJSetPreallocation_C", &aij));
682   if (!aij) {
683     PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIBAIJSetPreallocation_C", &baij));
684     if (!baij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", &baij));
685     if (!baij) {
686       PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPISBAIJSetPreallocation_C", &sbaij));
687       if (!sbaij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqSBAIJSetPreallocation_C", &sbaij));
688       if (!sbaij) {
689         PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPISELLSetPreallocation_C", &sell));
690         if (!sell) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqSELLSetPreallocation_C", &sell));
691       }
692       if (!sell) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatISSetPreallocation_C", &is));
693     }
694   }
695   if (aij) {
696     if (dim == 1) {
697       if (dd->ofill) {
698         PetscCall(DMCreateMatrix_DA_1d_MPIAIJ_Fill(da, A));
699       } else {
700         DMBoundaryType bx;
701         PetscMPIInt    size;
702         PetscCall(DMDAGetInfo(da, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &bx, NULL, NULL, NULL));
703         PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size));
704         if (size == 1 && bx == DM_BOUNDARY_NONE) {
705           PetscCall(DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(da, A));
706         } else {
707           PetscCall(DMCreateMatrix_DA_1d_MPIAIJ(da, A));
708         }
709       }
710     } else if (dim == 2) {
711       if (dd->ofill) {
712         PetscCall(DMCreateMatrix_DA_2d_MPIAIJ_Fill(da, A));
713       } else {
714         PetscCall(DMCreateMatrix_DA_2d_MPIAIJ(da, A));
715       }
716     } else if (dim == 3) {
717       if (dd->ofill) {
718         PetscCall(DMCreateMatrix_DA_3d_MPIAIJ_Fill(da, A));
719       } else {
720         PetscCall(DMCreateMatrix_DA_3d_MPIAIJ(da, A));
721       }
722     }
723   } else if (baij) {
724     if (dim == 2) {
725       PetscCall(DMCreateMatrix_DA_2d_MPIBAIJ(da, A));
726     } else if (dim == 3) {
727       PetscCall(DMCreateMatrix_DA_3d_MPIBAIJ(da, A));
728     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
729   } else if (sbaij) {
730     if (dim == 2) {
731       PetscCall(DMCreateMatrix_DA_2d_MPISBAIJ(da, A));
732     } else if (dim == 3) {
733       PetscCall(DMCreateMatrix_DA_3d_MPISBAIJ(da, A));
734     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
735   } else if (sell) {
736     if (dim == 2) {
737       PetscCall(DMCreateMatrix_DA_2d_MPISELL(da, A));
738     } else if (dim == 3) {
739       PetscCall(DMCreateMatrix_DA_3d_MPISELL(da, A));
740     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
741   } else if (is) {
742     PetscCall(DMCreateMatrix_DA_IS(da, A));
743   } else {
744     ISLocalToGlobalMapping ltog;
745 
746     PetscCall(MatSetBlockSize(A, dof));
747     PetscCall(MatSetUp(A));
748     PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
749     PetscCall(MatSetLocalToGlobalMapping(A, ltog, ltog));
750   }
751   PetscCall(DMDAGetGhostCorners(da, &starts[0], &starts[1], &starts[2], &dims[0], &dims[1], &dims[2]));
752   PetscCall(MatSetStencil(A, dim, dims, starts, dof));
753   PetscCall(MatSetDM(A, da));
754   PetscCallMPI(MPI_Comm_size(comm, &size));
755   if (size > 1) {
756     /* change viewer to display matrix in natural ordering */
757     PetscCall(MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA));
758     PetscCall(MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA));
759   }
760   *J = A;
761   PetscFunctionReturn(PETSC_SUCCESS);
762 }
763 
764 PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]);
765 
766 PetscErrorCode DMCreateMatrix_DA_IS(DM dm, Mat J)
767 {
768   DM_DA                 *da = (DM_DA *)dm->data;
769   Mat                    lJ, P;
770   ISLocalToGlobalMapping ltog;
771   IS                     is;
772   PetscBT                bt;
773   const PetscInt        *e_loc, *idx;
774   PetscInt               i, nel, nen, nv, dof, *gidx, n, N;
775 
776   /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted)
777      We need to filter out the local indices that are not represented through the DMDAGetElements decomposition */
778   PetscFunctionBegin;
779   dof = da->w;
780   PetscCall(MatSetBlockSize(J, dof));
781   PetscCall(DMGetLocalToGlobalMapping(dm, &ltog));
782 
783   /* flag local elements indices in local DMDA numbering */
784   PetscCall(ISLocalToGlobalMappingGetSize(ltog, &nv));
785   PetscCall(PetscBTCreate(nv / dof, &bt));
786   PetscCall(DMDAGetElements(dm, &nel, &nen, &e_loc)); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */
787   for (i = 0; i < nel * nen; i++) PetscCall(PetscBTSet(bt, e_loc[i]));
788 
789   /* filter out (set to -1) the global indices not used by the local elements */
790   PetscCall(PetscMalloc1(nv / dof, &gidx));
791   PetscCall(ISLocalToGlobalMappingGetBlockIndices(ltog, &idx));
792   PetscCall(PetscArraycpy(gidx, idx, nv / dof));
793   PetscCall(ISLocalToGlobalMappingRestoreBlockIndices(ltog, &idx));
794   for (i = 0; i < nv / dof; i++)
795     if (!PetscBTLookup(bt, i)) gidx[i] = -1;
796   PetscCall(PetscBTDestroy(&bt));
797   PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)dm), dof, nv / dof, gidx, PETSC_OWN_POINTER, &is));
798   PetscCall(ISLocalToGlobalMappingCreateIS(is, &ltog));
799   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
800   PetscCall(ISLocalToGlobalMappingDestroy(&ltog));
801   PetscCall(ISDestroy(&is));
802 
803   /* Preallocation */
804   if (dm->prealloc_skip) {
805     PetscCall(MatSetUp(J));
806   } else {
807     PetscCall(MatISGetLocalMat(J, &lJ));
808     PetscCall(MatGetLocalToGlobalMapping(lJ, &ltog, NULL));
809     PetscCall(MatCreate(PetscObjectComm((PetscObject)lJ), &P));
810     PetscCall(MatSetType(P, MATPREALLOCATOR));
811     PetscCall(MatSetLocalToGlobalMapping(P, ltog, ltog));
812     PetscCall(MatGetSize(lJ, &N, NULL));
813     PetscCall(MatGetLocalSize(lJ, &n, NULL));
814     PetscCall(MatSetSizes(P, n, n, N, N));
815     PetscCall(MatSetBlockSize(P, dof));
816     PetscCall(MatSetUp(P));
817     for (i = 0; i < nel; i++) PetscCall(MatSetValuesBlockedLocal(P, nen, e_loc + i * nen, nen, e_loc + i * nen, NULL, INSERT_VALUES));
818     PetscCall(MatPreallocatorPreallocate(P, (PetscBool)!da->prealloc_only, lJ));
819     PetscCall(MatISRestoreLocalMat(J, &lJ));
820     PetscCall(DMDARestoreElements(dm, &nel, &nen, &e_loc));
821     PetscCall(MatDestroy(&P));
822 
823     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
824     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
825   }
826   PetscFunctionReturn(PETSC_SUCCESS);
827 }
828 
829 PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da, Mat J)
830 {
831   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p;
832   PetscInt               lstart, lend, pstart, pend, *dnz, *onz;
833   MPI_Comm               comm;
834   PetscScalar           *values;
835   DMBoundaryType         bx, by;
836   ISLocalToGlobalMapping ltog;
837   DMDAStencilType        st;
838 
839   PetscFunctionBegin;
840   /*
841          nc - number of components per grid point
842          col - number of colors needed in one direction for single component problem
843   */
844   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st));
845   col = 2 * s + 1;
846   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
847   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
848   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
849 
850   PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols));
851   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
852 
853   PetscCall(MatSetBlockSize(J, nc));
854   /* determine the matrix preallocation information */
855   MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
856   for (i = xs; i < xs + nx; i++) {
857     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
858     pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
859 
860     for (j = ys; j < ys + ny; j++) {
861       slot = i - gxs + gnx * (j - gys);
862 
863       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
864       lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
865 
866       cnt = 0;
867       for (k = 0; k < nc; k++) {
868         for (l = lstart; l < lend + 1; l++) {
869           for (p = pstart; p < pend + 1; p++) {
870             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
871               cols[cnt++] = k + nc * (slot + gnx * l + p);
872             }
873           }
874         }
875         rows[k] = k + nc * (slot);
876       }
877       PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
878     }
879   }
880   PetscCall(MatSetBlockSize(J, nc));
881   PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz));
882   PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz));
883   MatPreallocateEnd(dnz, onz);
884 
885   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
886 
887   /*
888     For each node in the grid: we get the neighbors in the local (on processor ordering
889     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
890     PETSc ordering.
891   */
892   if (!da->prealloc_only) {
893     PetscCall(PetscCalloc1(col * col * nc * nc, &values));
894     for (i = xs; i < xs + nx; i++) {
895       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
896       pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
897 
898       for (j = ys; j < ys + ny; j++) {
899         slot = i - gxs + gnx * (j - gys);
900 
901         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
902         lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
903 
904         cnt = 0;
905         for (k = 0; k < nc; k++) {
906           for (l = lstart; l < lend + 1; l++) {
907             for (p = pstart; p < pend + 1; p++) {
908               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
909                 cols[cnt++] = k + nc * (slot + gnx * l + p);
910               }
911             }
912           }
913           rows[k] = k + nc * (slot);
914         }
915         PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES));
916       }
917     }
918     PetscCall(PetscFree(values));
919     /* do not copy values to GPU since they are all zero and not yet needed there */
920     PetscCall(MatBindToCPU(J, PETSC_TRUE));
921     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
922     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
923     PetscCall(MatBindToCPU(J, PETSC_FALSE));
924     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
925   }
926   PetscCall(PetscFree2(rows, cols));
927   PetscFunctionReturn(PETSC_SUCCESS);
928 }
929 
930 PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da, Mat J)
931 {
932   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
933   PetscInt               m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL;
934   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
935   MPI_Comm               comm;
936   PetscScalar           *values;
937   DMBoundaryType         bx, by, bz;
938   ISLocalToGlobalMapping ltog;
939   DMDAStencilType        st;
940 
941   PetscFunctionBegin;
942   /*
943          nc - number of components per grid point
944          col - number of colors needed in one direction for single component problem
945   */
946   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
947   col = 2 * s + 1;
948   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
949   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
950   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
951 
952   PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols));
953   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
954 
955   PetscCall(MatSetBlockSize(J, nc));
956   /* determine the matrix preallocation information */
957   MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
958   for (i = xs; i < xs + nx; i++) {
959     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
960     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
961     for (j = ys; j < ys + ny; j++) {
962       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
963       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
964       for (k = zs; k < zs + nz; k++) {
965         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
966         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
967 
968         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
969 
970         cnt = 0;
971         for (l = 0; l < nc; l++) {
972           for (ii = istart; ii < iend + 1; ii++) {
973             for (jj = jstart; jj < jend + 1; jj++) {
974               for (kk = kstart; kk < kend + 1; kk++) {
975                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
976                   cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
977                 }
978               }
979             }
980           }
981           rows[l] = l + nc * (slot);
982         }
983         PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
984       }
985     }
986   }
987   PetscCall(MatSetBlockSize(J, nc));
988   PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz));
989   PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz));
990   MatPreallocateEnd(dnz, onz);
991   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
992 
993   /*
994     For each node in the grid: we get the neighbors in the local (on processor ordering
995     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
996     PETSc ordering.
997   */
998   if (!da->prealloc_only) {
999     PetscCall(PetscCalloc1(col * col * col * nc * nc * nc, &values));
1000     for (i = xs; i < xs + nx; i++) {
1001       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1002       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1003       for (j = ys; j < ys + ny; j++) {
1004         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1005         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1006         for (k = zs; k < zs + nz; k++) {
1007           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1008           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1009 
1010           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1011 
1012           cnt = 0;
1013           for (l = 0; l < nc; l++) {
1014             for (ii = istart; ii < iend + 1; ii++) {
1015               for (jj = jstart; jj < jend + 1; jj++) {
1016                 for (kk = kstart; kk < kend + 1; kk++) {
1017                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1018                     cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
1019                   }
1020                 }
1021               }
1022             }
1023             rows[l] = l + nc * (slot);
1024           }
1025           PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES));
1026         }
1027       }
1028     }
1029     PetscCall(PetscFree(values));
1030     /* do not copy values to GPU since they are all zero and not yet needed there */
1031     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1032     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1033     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1034     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1035     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1036   }
1037   PetscCall(PetscFree2(rows, cols));
1038   PetscFunctionReturn(PETSC_SUCCESS);
1039 }
1040 
1041 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da, Mat J)
1042 {
1043   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, M, N;
1044   PetscInt               lstart, lend, pstart, pend, *dnz, *onz;
1045   MPI_Comm               comm;
1046   DMBoundaryType         bx, by;
1047   ISLocalToGlobalMapping ltog, mltog;
1048   DMDAStencilType        st;
1049   PetscBool              removedups = PETSC_FALSE, alreadyboundtocpu = PETSC_TRUE;
1050 
1051   PetscFunctionBegin;
1052   /*
1053          nc - number of components per grid point
1054          col - number of colors needed in one direction for single component problem
1055   */
1056   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st));
1057   if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE));
1058   col = 2 * s + 1;
1059   /*
1060        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1061        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1062   */
1063   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1064   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1065   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1066   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1067   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1068 
1069   PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols));
1070   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1071 
1072   PetscCall(MatSetBlockSize(J, nc));
1073   /* determine the matrix preallocation information */
1074   MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
1075   for (i = xs; i < xs + nx; i++) {
1076     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1077     pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1078 
1079     for (j = ys; j < ys + ny; j++) {
1080       slot = i - gxs + gnx * (j - gys);
1081 
1082       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1083       lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1084 
1085       cnt = 0;
1086       for (k = 0; k < nc; k++) {
1087         for (l = lstart; l < lend + 1; l++) {
1088           for (p = pstart; p < pend + 1; p++) {
1089             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
1090               cols[cnt++] = k + nc * (slot + gnx * l + p);
1091             }
1092           }
1093         }
1094         rows[k] = k + nc * (slot);
1095       }
1096       if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1097       else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1098     }
1099   }
1100   PetscCall(MatSetBlockSize(J, nc));
1101   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
1102   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
1103   MatPreallocateEnd(dnz, onz);
1104   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1105   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1106 
1107   /*
1108     For each node in the grid: we get the neighbors in the local (on processor ordering
1109     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1110     PETSc ordering.
1111   */
1112   if (!da->prealloc_only) {
1113     for (i = xs; i < xs + nx; i++) {
1114       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1115       pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1116 
1117       for (j = ys; j < ys + ny; j++) {
1118         slot = i - gxs + gnx * (j - gys);
1119 
1120         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1121         lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1122 
1123         cnt = 0;
1124         for (l = lstart; l < lend + 1; l++) {
1125           for (p = pstart; p < pend + 1; p++) {
1126             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
1127               cols[cnt++] = nc * (slot + gnx * l + p);
1128               for (k = 1; k < nc; k++) {
1129                 cols[cnt] = 1 + cols[cnt - 1];
1130                 cnt++;
1131               }
1132             }
1133           }
1134         }
1135         for (k = 0; k < nc; k++) rows[k] = k + nc * (slot);
1136         PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1137       }
1138     }
1139     /* do not copy values to GPU since they are all zero and not yet needed there */
1140     PetscCall(MatBoundToCPU(J, &alreadyboundtocpu));
1141     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1142     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1143     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1144     if (!alreadyboundtocpu) PetscCall(MatBindToCPU(J, PETSC_FALSE));
1145     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1146     if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1147   }
1148   PetscCall(PetscFree2(rows, cols));
1149   PetscFunctionReturn(PETSC_SUCCESS);
1150 }
1151 
1152 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da, Mat J)
1153 {
1154   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1155   PetscInt               m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, M, N;
1156   PetscInt               lstart, lend, pstart, pend, *dnz, *onz;
1157   DM_DA                 *dd = (DM_DA *)da->data;
1158   PetscInt               ifill_col, *ofill = dd->ofill, *dfill = dd->dfill;
1159   MPI_Comm               comm;
1160   DMBoundaryType         bx, by;
1161   ISLocalToGlobalMapping ltog;
1162   DMDAStencilType        st;
1163   PetscBool              removedups = PETSC_FALSE;
1164 
1165   PetscFunctionBegin;
1166   /*
1167          nc - number of components per grid point
1168          col - number of colors needed in one direction for single component problem
1169   */
1170   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st));
1171   col = 2 * s + 1;
1172   /*
1173        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1174        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1175   */
1176   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1177   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1178   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1179   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1180   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1181 
1182   PetscCall(PetscMalloc1(col * col * nc, &cols));
1183   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1184 
1185   PetscCall(MatSetBlockSize(J, nc));
1186   /* determine the matrix preallocation information */
1187   MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
1188   for (i = xs; i < xs + nx; i++) {
1189     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1190     pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1191 
1192     for (j = ys; j < ys + ny; j++) {
1193       slot = i - gxs + gnx * (j - gys);
1194 
1195       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1196       lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1197 
1198       for (k = 0; k < nc; k++) {
1199         cnt = 0;
1200         for (l = lstart; l < lend + 1; l++) {
1201           for (p = pstart; p < pend + 1; p++) {
1202             if (l || p) {
1203               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */
1204                 for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p);
1205               }
1206             } else {
1207               if (dfill) {
1208                 for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p);
1209               } else {
1210                 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p);
1211               }
1212             }
1213           }
1214         }
1215         row    = k + nc * (slot);
1216         maxcnt = PetscMax(maxcnt, cnt);
1217         if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
1218         else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
1219       }
1220     }
1221   }
1222   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
1223   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
1224   MatPreallocateEnd(dnz, onz);
1225   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1226 
1227   /*
1228     For each node in the grid: we get the neighbors in the local (on processor ordering
1229     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1230     PETSc ordering.
1231   */
1232   if (!da->prealloc_only) {
1233     for (i = xs; i < xs + nx; i++) {
1234       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1235       pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1236 
1237       for (j = ys; j < ys + ny; j++) {
1238         slot = i - gxs + gnx * (j - gys);
1239 
1240         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1241         lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1242 
1243         for (k = 0; k < nc; k++) {
1244           cnt = 0;
1245           for (l = lstart; l < lend + 1; l++) {
1246             for (p = pstart; p < pend + 1; p++) {
1247               if (l || p) {
1248                 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */
1249                   for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p);
1250                 }
1251               } else {
1252                 if (dfill) {
1253                   for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p);
1254                 } else {
1255                   for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p);
1256                 }
1257               }
1258             }
1259           }
1260           row = k + nc * (slot);
1261           PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1262         }
1263       }
1264     }
1265     /* do not copy values to GPU since they are all zero and not yet needed there */
1266     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1267     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1268     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1269     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1270     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1271   }
1272   PetscCall(PetscFree(cols));
1273   PetscFunctionReturn(PETSC_SUCCESS);
1274 }
1275 
1276 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da, Mat J)
1277 {
1278   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1279   PetscInt               m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL;
1280   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
1281   MPI_Comm               comm;
1282   DMBoundaryType         bx, by, bz;
1283   ISLocalToGlobalMapping ltog, mltog;
1284   DMDAStencilType        st;
1285   PetscBool              removedups = PETSC_FALSE;
1286 
1287   PetscFunctionBegin;
1288   /*
1289          nc - number of components per grid point
1290          col - number of colors needed in one direction for single component problem
1291   */
1292   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
1293   if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE));
1294   col = 2 * s + 1;
1295 
1296   /*
1297        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1298        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1299   */
1300   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1301   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1302   if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE;
1303 
1304   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
1305   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
1306   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1307 
1308   PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols));
1309   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1310 
1311   PetscCall(MatSetBlockSize(J, nc));
1312   /* determine the matrix preallocation information */
1313   MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
1314   for (i = xs; i < xs + nx; i++) {
1315     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1316     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1317     for (j = ys; j < ys + ny; j++) {
1318       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1319       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1320       for (k = zs; k < zs + nz; k++) {
1321         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1322         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1323 
1324         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1325 
1326         cnt = 0;
1327         for (l = 0; l < nc; l++) {
1328           for (ii = istart; ii < iend + 1; ii++) {
1329             for (jj = jstart; jj < jend + 1; jj++) {
1330               for (kk = kstart; kk < kend + 1; kk++) {
1331                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1332                   cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
1333                 }
1334               }
1335             }
1336           }
1337           rows[l] = l + nc * (slot);
1338         }
1339         if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1340         else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1341       }
1342     }
1343   }
1344   PetscCall(MatSetBlockSize(J, nc));
1345   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
1346   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
1347   MatPreallocateEnd(dnz, onz);
1348   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1349   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1350 
1351   /*
1352     For each node in the grid: we get the neighbors in the local (on processor ordering
1353     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1354     PETSc ordering.
1355   */
1356   if (!da->prealloc_only) {
1357     for (i = xs; i < xs + nx; i++) {
1358       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1359       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1360       for (j = ys; j < ys + ny; j++) {
1361         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1362         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1363         for (k = zs; k < zs + nz; k++) {
1364           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1365           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1366 
1367           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1368 
1369           cnt = 0;
1370           for (kk = kstart; kk < kend + 1; kk++) {
1371             for (jj = jstart; jj < jend + 1; jj++) {
1372               for (ii = istart; ii < iend + 1; ii++) {
1373                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1374                   cols[cnt++] = nc * (slot + ii + gnx * jj + gnx * gny * kk);
1375                   for (l = 1; l < nc; l++) {
1376                     cols[cnt] = 1 + cols[cnt - 1];
1377                     cnt++;
1378                   }
1379                 }
1380               }
1381             }
1382           }
1383           rows[0] = nc * (slot);
1384           for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1385           PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1386         }
1387       }
1388     }
1389     /* do not copy values to GPU since they are all zero and not yet needed there */
1390     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1391     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1392     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1393     if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1394     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1395     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1396   }
1397   PetscCall(PetscFree2(rows, cols));
1398   PetscFunctionReturn(PETSC_SUCCESS);
1399 }
1400 
1401 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da, Mat J)
1402 {
1403   DM_DA                 *dd = (DM_DA *)da->data;
1404   PetscInt               xs, nx, i, j, gxs, gnx, row, k, l;
1405   PetscInt               m, dim, s, *cols = NULL, nc, cnt, maxcnt = 0, *ocols;
1406   PetscInt              *ofill = dd->ofill, *dfill = dd->dfill;
1407   DMBoundaryType         bx;
1408   ISLocalToGlobalMapping ltog;
1409   PetscMPIInt            rank, size;
1410 
1411   PetscFunctionBegin;
1412   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)da), &rank));
1413   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size));
1414 
1415   /*
1416          nc - number of components per grid point
1417   */
1418   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
1419   PetscCheck(s <= 1, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Matrix creation for 1d not implemented correctly for stencil width larger than 1");
1420   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
1421   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
1422 
1423   PetscCall(MatSetBlockSize(J, nc));
1424   PetscCall(PetscCalloc2(nx * nc, &cols, nx * nc, &ocols));
1425 
1426   /*
1427         note should be smaller for first and last process with no periodic
1428         does not handle dfill
1429   */
1430   cnt = 0;
1431   /* coupling with process to the left */
1432   for (i = 0; i < s; i++) {
1433     for (j = 0; j < nc; j++) {
1434       ocols[cnt] = ((rank == 0) ? 0 : (s - i) * (ofill[j + 1] - ofill[j]));
1435       cols[cnt]  = dfill[j + 1] - dfill[j] + (s + i) * (ofill[j + 1] - ofill[j]);
1436       if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1437         if (size > 1) ocols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]);
1438         else cols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]);
1439       }
1440       maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1441       cnt++;
1442     }
1443   }
1444   for (i = s; i < nx - s; i++) {
1445     for (j = 0; j < nc; j++) {
1446       cols[cnt] = dfill[j + 1] - dfill[j] + 2 * s * (ofill[j + 1] - ofill[j]);
1447       maxcnt    = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1448       cnt++;
1449     }
1450   }
1451   /* coupling with process to the right */
1452   for (i = nx - s; i < nx; i++) {
1453     for (j = 0; j < nc; j++) {
1454       ocols[cnt] = ((rank == (size - 1)) ? 0 : (i - nx + s + 1) * (ofill[j + 1] - ofill[j]));
1455       cols[cnt]  = dfill[j + 1] - dfill[j] + (s + nx - i - 1) * (ofill[j + 1] - ofill[j]);
1456       if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1457         if (size > 1) ocols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]);
1458         else cols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]);
1459       }
1460       maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1461       cnt++;
1462     }
1463   }
1464 
1465   PetscCall(MatSeqAIJSetPreallocation(J, 0, cols));
1466   PetscCall(MatMPIAIJSetPreallocation(J, 0, cols, 0, ocols));
1467   PetscCall(PetscFree2(cols, ocols));
1468 
1469   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1470   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1471 
1472   /*
1473     For each node in the grid: we get the neighbors in the local (on processor ordering
1474     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1475     PETSc ordering.
1476   */
1477   if (!da->prealloc_only) {
1478     PetscCall(PetscMalloc1(maxcnt, &cols));
1479     row = xs * nc;
1480     /* coupling with process to the left */
1481     for (i = xs; i < xs + s; i++) {
1482       for (j = 0; j < nc; j++) {
1483         cnt = 0;
1484         if (rank) {
1485           for (l = 0; l < s; l++) {
1486             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1487           }
1488         }
1489         if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1490           for (l = 0; l < s; l++) {
1491             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (m + i - s - l) * nc + ofill[k];
1492           }
1493         }
1494         if (dfill) {
1495           for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1496         } else {
1497           for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1498         }
1499         for (l = 0; l < s; l++) {
1500           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1501         }
1502         PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1503         row++;
1504       }
1505     }
1506     for (i = xs + s; i < xs + nx - s; i++) {
1507       for (j = 0; j < nc; j++) {
1508         cnt = 0;
1509         for (l = 0; l < s; l++) {
1510           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1511         }
1512         if (dfill) {
1513           for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1514         } else {
1515           for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1516         }
1517         for (l = 0; l < s; l++) {
1518           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1519         }
1520         PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1521         row++;
1522       }
1523     }
1524     /* coupling with process to the right */
1525     for (i = xs + nx - s; i < xs + nx; i++) {
1526       for (j = 0; j < nc; j++) {
1527         cnt = 0;
1528         for (l = 0; l < s; l++) {
1529           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1530         }
1531         if (dfill) {
1532           for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1533         } else {
1534           for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1535         }
1536         if (rank < size - 1) {
1537           for (l = 0; l < s; l++) {
1538             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1539           }
1540         }
1541         if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1542           for (l = 0; l < s; l++) {
1543             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s - l - m + 2) * nc + ofill[k];
1544           }
1545         }
1546         PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1547         row++;
1548       }
1549     }
1550     PetscCall(PetscFree(cols));
1551     /* do not copy values to GPU since they are all zero and not yet needed there */
1552     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1553     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1554     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1555     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1556     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1557   }
1558   PetscFunctionReturn(PETSC_SUCCESS);
1559 }
1560 
1561 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da, Mat J)
1562 {
1563   PetscInt               xs, nx, i, i1, slot, gxs, gnx;
1564   PetscInt               m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l;
1565   PetscInt               istart, iend;
1566   DMBoundaryType         bx;
1567   ISLocalToGlobalMapping ltog, mltog;
1568 
1569   PetscFunctionBegin;
1570   /*
1571          nc - number of components per grid point
1572          col - number of colors needed in one direction for single component problem
1573   */
1574   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
1575   if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE));
1576   col = 2 * s + 1;
1577 
1578   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
1579   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
1580 
1581   PetscCall(MatSetBlockSize(J, nc));
1582   PetscCall(MatSeqAIJSetPreallocation(J, col * nc, NULL));
1583   PetscCall(MatMPIAIJSetPreallocation(J, col * nc, NULL, col * nc, NULL));
1584 
1585   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1586   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1587   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1588 
1589   /*
1590     For each node in the grid: we get the neighbors in the local (on processor ordering
1591     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1592     PETSc ordering.
1593   */
1594   if (!da->prealloc_only) {
1595     PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols));
1596     for (i = xs; i < xs + nx; i++) {
1597       istart = PetscMax(-s, gxs - i);
1598       iend   = PetscMin(s, gxs + gnx - i - 1);
1599       slot   = i - gxs;
1600 
1601       cnt = 0;
1602       for (i1 = istart; i1 < iend + 1; i1++) {
1603         cols[cnt++] = nc * (slot + i1);
1604         for (l = 1; l < nc; l++) {
1605           cols[cnt] = 1 + cols[cnt - 1];
1606           cnt++;
1607         }
1608       }
1609       rows[0] = nc * (slot);
1610       for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1611       PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1612     }
1613     /* do not copy values to GPU since they are all zero and not yet needed there */
1614     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1615     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1616     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1617     if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1618     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1619     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1620     PetscCall(PetscFree2(rows, cols));
1621   }
1622   PetscFunctionReturn(PETSC_SUCCESS);
1623 }
1624 
1625 PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM da, Mat J)
1626 {
1627   PetscInt               xs, nx, i, i1, slot, gxs, gnx;
1628   PetscInt               m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l;
1629   PetscInt               istart, iend;
1630   DMBoundaryType         bx;
1631   ISLocalToGlobalMapping ltog, mltog;
1632 
1633   PetscFunctionBegin;
1634   /*
1635          nc - number of components per grid point
1636          col - number of colors needed in one direction for single component problem
1637   */
1638   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
1639   col = 2 * s + 1;
1640 
1641   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
1642   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
1643 
1644   PetscCall(MatSetBlockSize(J, nc));
1645   PetscCall(MatSeqAIJSetTotalPreallocation(J, nx * nc * col * nc));
1646 
1647   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1648   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1649   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1650 
1651   /*
1652     For each node in the grid: we get the neighbors in the local (on processor ordering
1653     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1654     PETSc ordering.
1655   */
1656   if (!da->prealloc_only) {
1657     PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols));
1658     for (i = xs; i < xs + nx; i++) {
1659       istart = PetscMax(-s, gxs - i);
1660       iend   = PetscMin(s, gxs + gnx - i - 1);
1661       slot   = i - gxs;
1662 
1663       cnt = 0;
1664       for (i1 = istart; i1 < iend + 1; i1++) {
1665         cols[cnt++] = nc * (slot + i1);
1666         for (l = 1; l < nc; l++) {
1667           cols[cnt] = 1 + cols[cnt - 1];
1668           cnt++;
1669         }
1670       }
1671       rows[0] = nc * (slot);
1672       for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1673       PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1674     }
1675     /* do not copy values to GPU since they are all zero and not yet needed there */
1676     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1677     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1678     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1679     if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1680     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1681     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1682     PetscCall(PetscFree2(rows, cols));
1683   }
1684   PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1685   PetscFunctionReturn(PETSC_SUCCESS);
1686 }
1687 
1688 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da, Mat J)
1689 {
1690   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1691   PetscInt               m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz;
1692   PetscInt               istart, iend, jstart, jend, ii, jj;
1693   MPI_Comm               comm;
1694   PetscScalar           *values;
1695   DMBoundaryType         bx, by;
1696   DMDAStencilType        st;
1697   ISLocalToGlobalMapping ltog;
1698 
1699   PetscFunctionBegin;
1700   /*
1701      nc - number of components per grid point
1702      col - number of colors needed in one direction for single component problem
1703   */
1704   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st));
1705   col = 2 * s + 1;
1706 
1707   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1708   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1709   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1710 
1711   PetscCall(PetscMalloc1(col * col * nc * nc, &cols));
1712 
1713   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1714 
1715   /* determine the matrix preallocation information */
1716   MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz);
1717   for (i = xs; i < xs + nx; i++) {
1718     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1719     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1720     for (j = ys; j < ys + ny; j++) {
1721       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1722       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1723       slot   = i - gxs + gnx * (j - gys);
1724 
1725       /* Find block columns in block row */
1726       cnt = 0;
1727       for (ii = istart; ii < iend + 1; ii++) {
1728         for (jj = jstart; jj < jend + 1; jj++) {
1729           if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1730             cols[cnt++] = slot + ii + gnx * jj;
1731           }
1732         }
1733       }
1734       PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz));
1735     }
1736   }
1737   PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz));
1738   PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
1739   MatPreallocateEnd(dnz, onz);
1740 
1741   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1742 
1743   /*
1744     For each node in the grid: we get the neighbors in the local (on processor ordering
1745     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1746     PETSc ordering.
1747   */
1748   if (!da->prealloc_only) {
1749     PetscCall(PetscCalloc1(col * col * nc * nc, &values));
1750     for (i = xs; i < xs + nx; i++) {
1751       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1752       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1753       for (j = ys; j < ys + ny; j++) {
1754         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1755         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1756         slot   = i - gxs + gnx * (j - gys);
1757         cnt    = 0;
1758         for (ii = istart; ii < iend + 1; ii++) {
1759           for (jj = jstart; jj < jend + 1; jj++) {
1760             if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1761               cols[cnt++] = slot + ii + gnx * jj;
1762             }
1763           }
1764         }
1765         PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
1766       }
1767     }
1768     PetscCall(PetscFree(values));
1769     /* do not copy values to GPU since they are all zero and not yet needed there */
1770     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1771     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1772     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1773     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1774     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1775   }
1776   PetscCall(PetscFree(cols));
1777   PetscFunctionReturn(PETSC_SUCCESS);
1778 }
1779 
1780 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da, Mat J)
1781 {
1782   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1783   PetscInt               m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz;
1784   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk;
1785   MPI_Comm               comm;
1786   PetscScalar           *values;
1787   DMBoundaryType         bx, by, bz;
1788   DMDAStencilType        st;
1789   ISLocalToGlobalMapping ltog;
1790 
1791   PetscFunctionBegin;
1792   /*
1793          nc - number of components per grid point
1794          col - number of colors needed in one direction for single component problem
1795   */
1796   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st));
1797   col = 2 * s + 1;
1798 
1799   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
1800   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
1801   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1802 
1803   PetscCall(PetscMalloc1(col * col * col, &cols));
1804 
1805   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1806 
1807   /* determine the matrix preallocation information */
1808   MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz);
1809   for (i = xs; i < xs + nx; i++) {
1810     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1811     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1812     for (j = ys; j < ys + ny; j++) {
1813       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1814       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1815       for (k = zs; k < zs + nz; k++) {
1816         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1817         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1818 
1819         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1820 
1821         /* Find block columns in block row */
1822         cnt = 0;
1823         for (ii = istart; ii < iend + 1; ii++) {
1824           for (jj = jstart; jj < jend + 1; jj++) {
1825             for (kk = kstart; kk < kend + 1; kk++) {
1826               if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1827                 cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
1828               }
1829             }
1830           }
1831         }
1832         PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz));
1833       }
1834     }
1835   }
1836   PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz));
1837   PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
1838   MatPreallocateEnd(dnz, onz);
1839 
1840   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1841 
1842   /*
1843     For each node in the grid: we get the neighbors in the local (on processor ordering
1844     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1845     PETSc ordering.
1846   */
1847   if (!da->prealloc_only) {
1848     PetscCall(PetscCalloc1(col * col * col * nc * nc, &values));
1849     for (i = xs; i < xs + nx; i++) {
1850       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1851       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1852       for (j = ys; j < ys + ny; j++) {
1853         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1854         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1855         for (k = zs; k < zs + nz; k++) {
1856           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1857           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1858 
1859           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1860 
1861           cnt = 0;
1862           for (ii = istart; ii < iend + 1; ii++) {
1863             for (jj = jstart; jj < jend + 1; jj++) {
1864               for (kk = kstart; kk < kend + 1; kk++) {
1865                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1866                   cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
1867                 }
1868               }
1869             }
1870           }
1871           PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
1872         }
1873       }
1874     }
1875     PetscCall(PetscFree(values));
1876     /* do not copy values to GPU since they are all zero and not yet needed there */
1877     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1878     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1879     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1880     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1881     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1882   }
1883   PetscCall(PetscFree(cols));
1884   PetscFunctionReturn(PETSC_SUCCESS);
1885 }
1886 
1887 /*
1888   This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to
1889   identify in the local ordering with periodic domain.
1890 */
1891 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog, PetscInt *row, PetscInt *cnt, PetscInt col[])
1892 {
1893   PetscInt i, n;
1894 
1895   PetscFunctionBegin;
1896   PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, 1, row, row));
1897   PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, *cnt, col, col));
1898   for (i = 0, n = 0; i < *cnt; i++) {
1899     if (col[i] >= *row) col[n++] = col[i];
1900   }
1901   *cnt = n;
1902   PetscFunctionReturn(PETSC_SUCCESS);
1903 }
1904 
1905 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da, Mat J)
1906 {
1907   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1908   PetscInt               m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz;
1909   PetscInt               istart, iend, jstart, jend, ii, jj;
1910   MPI_Comm               comm;
1911   PetscScalar           *values;
1912   DMBoundaryType         bx, by;
1913   DMDAStencilType        st;
1914   ISLocalToGlobalMapping ltog;
1915 
1916   PetscFunctionBegin;
1917   /*
1918      nc - number of components per grid point
1919      col - number of colors needed in one direction for single component problem
1920   */
1921   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st));
1922   col = 2 * s + 1;
1923 
1924   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1925   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1926   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1927 
1928   PetscCall(PetscMalloc1(col * col * nc * nc, &cols));
1929 
1930   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1931 
1932   /* determine the matrix preallocation information */
1933   MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz);
1934   for (i = xs; i < xs + nx; i++) {
1935     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1936     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1937     for (j = ys; j < ys + ny; j++) {
1938       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1939       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1940       slot   = i - gxs + gnx * (j - gys);
1941 
1942       /* Find block columns in block row */
1943       cnt = 0;
1944       for (ii = istart; ii < iend + 1; ii++) {
1945         for (jj = jstart; jj < jend + 1; jj++) {
1946           if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj;
1947         }
1948       }
1949       PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
1950       PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz));
1951     }
1952   }
1953   PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz));
1954   PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
1955   MatPreallocateEnd(dnz, onz);
1956 
1957   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1958 
1959   /*
1960     For each node in the grid: we get the neighbors in the local (on processor ordering
1961     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1962     PETSc ordering.
1963   */
1964   if (!da->prealloc_only) {
1965     PetscCall(PetscCalloc1(col * col * nc * nc, &values));
1966     for (i = xs; i < xs + nx; i++) {
1967       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1968       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1969       for (j = ys; j < ys + ny; j++) {
1970         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1971         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1972         slot   = i - gxs + gnx * (j - gys);
1973 
1974         /* Find block columns in block row */
1975         cnt = 0;
1976         for (ii = istart; ii < iend + 1; ii++) {
1977           for (jj = jstart; jj < jend + 1; jj++) {
1978             if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj;
1979           }
1980         }
1981         PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
1982         PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
1983       }
1984     }
1985     PetscCall(PetscFree(values));
1986     /* do not copy values to GPU since they are all zero and not yet needed there */
1987     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1988     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1989     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1990     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1991     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1992   }
1993   PetscCall(PetscFree(cols));
1994   PetscFunctionReturn(PETSC_SUCCESS);
1995 }
1996 
1997 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da, Mat J)
1998 {
1999   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
2000   PetscInt               m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz;
2001   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk;
2002   MPI_Comm               comm;
2003   PetscScalar           *values;
2004   DMBoundaryType         bx, by, bz;
2005   DMDAStencilType        st;
2006   ISLocalToGlobalMapping ltog;
2007 
2008   PetscFunctionBegin;
2009   /*
2010      nc - number of components per grid point
2011      col - number of colors needed in one direction for single component problem
2012   */
2013   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st));
2014   col = 2 * s + 1;
2015 
2016   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
2017   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
2018   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
2019 
2020   /* create the matrix */
2021   PetscCall(PetscMalloc1(col * col * col, &cols));
2022 
2023   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
2024 
2025   /* determine the matrix preallocation information */
2026   MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz);
2027   for (i = xs; i < xs + nx; i++) {
2028     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2029     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2030     for (j = ys; j < ys + ny; j++) {
2031       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2032       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2033       for (k = zs; k < zs + nz; k++) {
2034         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2035         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2036 
2037         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2038 
2039         /* Find block columns in block row */
2040         cnt = 0;
2041         for (ii = istart; ii < iend + 1; ii++) {
2042           for (jj = jstart; jj < jend + 1; jj++) {
2043             for (kk = kstart; kk < kend + 1; kk++) {
2044               if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
2045             }
2046           }
2047         }
2048         PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
2049         PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz));
2050       }
2051     }
2052   }
2053   PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz));
2054   PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
2055   MatPreallocateEnd(dnz, onz);
2056 
2057   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
2058 
2059   /*
2060     For each node in the grid: we get the neighbors in the local (on processor ordering
2061     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2062     PETSc ordering.
2063   */
2064   if (!da->prealloc_only) {
2065     PetscCall(PetscCalloc1(col * col * col * nc * nc, &values));
2066     for (i = xs; i < xs + nx; i++) {
2067       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2068       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2069       for (j = ys; j < ys + ny; j++) {
2070         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2071         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2072         for (k = zs; k < zs + nz; k++) {
2073           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2074           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2075 
2076           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2077 
2078           cnt = 0;
2079           for (ii = istart; ii < iend + 1; ii++) {
2080             for (jj = jstart; jj < jend + 1; jj++) {
2081               for (kk = kstart; kk < kend + 1; kk++) {
2082                 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
2083               }
2084             }
2085           }
2086           PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
2087           PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
2088         }
2089       }
2090     }
2091     PetscCall(PetscFree(values));
2092     /* do not copy values to GPU since they are all zero and not yet needed there */
2093     PetscCall(MatBindToCPU(J, PETSC_TRUE));
2094     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
2095     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
2096     PetscCall(MatBindToCPU(J, PETSC_FALSE));
2097     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2098   }
2099   PetscCall(PetscFree(cols));
2100   PetscFunctionReturn(PETSC_SUCCESS);
2101 }
2102 
2103 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da, Mat J)
2104 {
2105   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
2106   PetscInt               m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, *dnz, *onz;
2107   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
2108   DM_DA                 *dd = (DM_DA *)da->data;
2109   PetscInt               ifill_col, *dfill = dd->dfill, *ofill = dd->ofill;
2110   MPI_Comm               comm;
2111   PetscScalar           *values;
2112   DMBoundaryType         bx, by, bz;
2113   ISLocalToGlobalMapping ltog;
2114   DMDAStencilType        st;
2115   PetscBool              removedups = PETSC_FALSE;
2116 
2117   PetscFunctionBegin;
2118   /*
2119          nc - number of components per grid point
2120          col - number of colors needed in one direction for single component problem
2121   */
2122   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
2123   col = 2 * s + 1;
2124 
2125   /*
2126        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
2127        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
2128   */
2129   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
2130   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
2131   if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE;
2132 
2133   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
2134   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
2135   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
2136 
2137   PetscCall(PetscMalloc1(col * col * col * nc, &cols));
2138   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
2139 
2140   /* determine the matrix preallocation information */
2141   MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
2142 
2143   PetscCall(MatSetBlockSize(J, nc));
2144   for (i = xs; i < xs + nx; i++) {
2145     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2146     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2147     for (j = ys; j < ys + ny; j++) {
2148       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2149       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2150       for (k = zs; k < zs + nz; k++) {
2151         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2152         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2153 
2154         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2155 
2156         for (l = 0; l < nc; l++) {
2157           cnt = 0;
2158           for (ii = istart; ii < iend + 1; ii++) {
2159             for (jj = jstart; jj < jend + 1; jj++) {
2160               for (kk = kstart; kk < kend + 1; kk++) {
2161                 if (ii || jj || kk) {
2162                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
2163                     for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2164                   }
2165                 } else {
2166                   if (dfill) {
2167                     for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2168                   } else {
2169                     for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2170                   }
2171                 }
2172               }
2173             }
2174           }
2175           row    = l + nc * (slot);
2176           maxcnt = PetscMax(maxcnt, cnt);
2177           if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
2178           else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
2179         }
2180       }
2181     }
2182   }
2183   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
2184   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
2185   MatPreallocateEnd(dnz, onz);
2186   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
2187 
2188   /*
2189     For each node in the grid: we get the neighbors in the local (on processor ordering
2190     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2191     PETSc ordering.
2192   */
2193   if (!da->prealloc_only) {
2194     PetscCall(PetscCalloc1(maxcnt, &values));
2195     for (i = xs; i < xs + nx; i++) {
2196       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2197       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2198       for (j = ys; j < ys + ny; j++) {
2199         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2200         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2201         for (k = zs; k < zs + nz; k++) {
2202           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2203           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2204 
2205           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2206 
2207           for (l = 0; l < nc; l++) {
2208             cnt = 0;
2209             for (ii = istart; ii < iend + 1; ii++) {
2210               for (jj = jstart; jj < jend + 1; jj++) {
2211                 for (kk = kstart; kk < kend + 1; kk++) {
2212                   if (ii || jj || kk) {
2213                     if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
2214                       for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2215                     }
2216                   } else {
2217                     if (dfill) {
2218                       for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2219                     } else {
2220                       for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2221                     }
2222                   }
2223                 }
2224               }
2225             }
2226             row = l + nc * (slot);
2227             PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, values, INSERT_VALUES));
2228           }
2229         }
2230       }
2231     }
2232     PetscCall(PetscFree(values));
2233     /* do not copy values to GPU since they are all zero and not yet needed there */
2234     PetscCall(MatBindToCPU(J, PETSC_TRUE));
2235     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
2236     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
2237     PetscCall(MatBindToCPU(J, PETSC_FALSE));
2238     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2239   }
2240   PetscCall(PetscFree(cols));
2241   PetscFunctionReturn(PETSC_SUCCESS);
2242 }
2243