xref: /petsc/src/dm/impls/da/fdda.c (revision 53df731d4a58a13d32dc697c3d2b74491914e802)
1 #include <petsc/private/dmdaimpl.h> /*I      "petscdmda.h"     I*/
2 #include <petscmat.h>
3 #include <petscbt.h>
4 
5 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM, ISColoringType, ISColoring *);
6 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM, ISColoringType, ISColoring *);
7 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM, ISColoringType, ISColoring *);
8 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM, ISColoringType, ISColoring *);
9 
10 /*
11    For ghost i that may be negative or greater than the upper bound this
12   maps it into the 0:m-1 range using periodicity
13 */
14 #define SetInRange(i, m) ((i < 0) ? m + i : ((i >= m) ? i - m : i))
15 
16 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill, PetscInt w, PetscInt **rfill)
17 {
18   PetscInt i, j, nz, *fill;
19 
20   PetscFunctionBegin;
21   if (!dfill) PetscFunctionReturn(PETSC_SUCCESS);
22 
23   /* count number nonzeros */
24   nz = 0;
25   for (i = 0; i < w; i++) {
26     for (j = 0; j < w; j++) {
27       if (dfill[w * i + j]) nz++;
28     }
29   }
30   PetscCall(PetscMalloc1(nz + w + 1, &fill));
31   /* construct modified CSR storage of nonzero structure */
32   /*  fill[0 -- w] marks starts of each row of column indices (and end of last row)
33    so fill[1] - fill[0] gives number of nonzeros in first row etc */
34   nz = w + 1;
35   for (i = 0; i < w; i++) {
36     fill[i] = nz;
37     for (j = 0; j < w; j++) {
38       if (dfill[w * i + j]) {
39         fill[nz] = j;
40         nz++;
41       }
42     }
43   }
44   fill[w] = nz;
45 
46   *rfill = fill;
47   PetscFunctionReturn(PETSC_SUCCESS);
48 }
49 
50 static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse, PetscInt w, PetscInt **rfill)
51 {
52   PetscInt nz;
53 
54   PetscFunctionBegin;
55   if (!dfillsparse) PetscFunctionReturn(PETSC_SUCCESS);
56 
57   /* Determine number of non-zeros */
58   nz = (dfillsparse[w] - w - 1);
59 
60   /* Allocate space for our copy of the given sparse matrix representation. */
61   PetscCall(PetscMalloc1(nz + w + 1, rfill));
62   PetscCall(PetscArraycpy(*rfill, dfillsparse, nz + w + 1));
63   PetscFunctionReturn(PETSC_SUCCESS);
64 }
65 
66 static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd)
67 {
68   PetscInt i, k, cnt = 1;
69 
70   PetscFunctionBegin;
71 
72   /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of
73    columns to the left with any nonzeros in them plus 1 */
74   PetscCall(PetscCalloc1(dd->w, &dd->ofillcols));
75   for (i = 0; i < dd->w; i++) {
76     for (k = dd->ofill[i]; k < dd->ofill[i + 1]; k++) dd->ofillcols[dd->ofill[k]] = 1;
77   }
78   for (i = 0; i < dd->w; i++) {
79     if (dd->ofillcols[i]) dd->ofillcols[i] = cnt++;
80   }
81   PetscFunctionReturn(PETSC_SUCCESS);
82 }
83 
84 /*@
85   DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem
86   of the matrix returned by `DMCreateMatrix()`.
87 
88   Logically Collective
89 
90   Input Parameters:
91 + da    - the distributed array
92 . dfill - the fill pattern in the diagonal block (may be `NULL`, means use dense block)
93 - ofill - the fill pattern in the off-diagonal blocks
94 
95   Level: developer
96 
97   Notes:
98   This only makes sense when you are doing multicomponent problems but using the
99   `MATMPIAIJ` matrix format
100 
101   The format for `dfill` and `ofill` is a 2 dimensional dof by dof matrix with 1 entries
102   representing coupling and 0 entries for missing coupling. For example
103 .vb
104             dfill[9] = {1, 0, 0,
105                         1, 1, 0,
106                         0, 1, 1}
107 .ve
108   means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with
109   itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the
110   diagonal block).
111 
112   `DMDASetGetMatrix()` allows you to provide general code for those more complicated nonzero patterns then
113   can be represented in the `dfill`, `ofill` format
114 
115   Contributed by\:
116   Glenn Hammond
117 
118 .seealso: [](sec_struct), `DM`, `DMDA`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()`, `DMDASetBlockFillsSparse()`
119 @*/
120 PetscErrorCode DMDASetBlockFills(DM da, const PetscInt *dfill, const PetscInt *ofill)
121 {
122   DM_DA *dd = (DM_DA *)da->data;
123 
124   PetscFunctionBegin;
125   /* save the given dfill and ofill information */
126   PetscCall(DMDASetBlockFills_Private(dfill, dd->w, &dd->dfill));
127   PetscCall(DMDASetBlockFills_Private(ofill, dd->w, &dd->ofill));
128 
129   /* count nonzeros in ofill columns */
130   PetscCall(DMDASetBlockFills_Private2(dd));
131   PetscFunctionReturn(PETSC_SUCCESS);
132 }
133 
134 /*@
135   DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem
136   of the matrix returned by `DMCreateMatrix()`, using sparse representations
137   of fill patterns.
138 
139   Logically Collective
140 
141   Input Parameters:
142 + da          - the distributed array
143 . dfillsparse - the sparse fill pattern in the diagonal block (may be `NULL`, means use dense block)
144 - ofillsparse - the sparse fill pattern in the off-diagonal blocks
145 
146   Level: developer
147 
148   Notes:
149   This only makes sense when you are doing multicomponent problems but using the
150   `MATMPIAIJ` matrix format
151 
152   The format for `dfill` and `ofill` is a sparse representation of a
153   dof-by-dof matrix with 1 entries representing coupling and 0 entries
154   for missing coupling.  The sparse representation is a 1 dimensional
155   array of length nz + dof + 1, where nz is the number of non-zeros in
156   the matrix.  The first dof entries in the array give the
157   starting array indices of each row's items in the rest of the array,
158   the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array)
159   and the remaining nz items give the column indices of each of
160   the 1s within the logical 2D matrix.  Each row's items within
161   the array are the column indices of the 1s within that row
162   of the 2D matrix.  PETSc developers may recognize that this is the
163   same format as that computed by the `DMDASetBlockFills_Private()`
164   function from a dense 2D matrix representation.
165 
166   `DMDASetGetMatrix()` allows you to provide general code for those more complicated nonzero patterns then
167   can be represented in the `dfill`, `ofill` format
168 
169   Contributed by\:
170   Philip C. Roth
171 
172 .seealso: [](sec_struct), `DM`, `DMDA`, `DMDASetBlockFills()`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()`
173 @*/
174 PetscErrorCode DMDASetBlockFillsSparse(DM da, const PetscInt *dfillsparse, const PetscInt *ofillsparse)
175 {
176   DM_DA *dd = (DM_DA *)da->data;
177 
178   PetscFunctionBegin;
179   /* save the given dfill and ofill information */
180   PetscCall(DMDASetBlockFillsSparse_Private(dfillsparse, dd->w, &dd->dfill));
181   PetscCall(DMDASetBlockFillsSparse_Private(ofillsparse, dd->w, &dd->ofill));
182 
183   /* count nonzeros in ofill columns */
184   PetscCall(DMDASetBlockFills_Private2(dd));
185   PetscFunctionReturn(PETSC_SUCCESS);
186 }
187 
188 PetscErrorCode DMCreateColoring_DA(DM da, ISColoringType ctype, ISColoring *coloring)
189 {
190   PetscInt       dim, m, n, p, nc;
191   DMBoundaryType bx, by, bz;
192   MPI_Comm       comm;
193   PetscMPIInt    size;
194   PetscBool      isBAIJ;
195   DM_DA         *dd = (DM_DA *)da->data;
196 
197   PetscFunctionBegin;
198   /*
199                                   m
200           ------------------------------------------------------
201          |                                                     |
202          |                                                     |
203          |               ----------------------                |
204          |               |                    |                |
205       n  |           yn  |                    |                |
206          |               |                    |                |
207          |               .---------------------                |
208          |             (xs,ys)     xn                          |
209          |            .                                        |
210          |         (gxs,gys)                                   |
211          |                                                     |
212           -----------------------------------------------------
213   */
214 
215   /*
216          nc - number of components per grid point
217          col - number of colors needed in one direction for single component problem
218 
219   */
220   PetscCall(DMDAGetInfo(da, &dim, NULL, NULL, NULL, &m, &n, &p, &nc, NULL, &bx, &by, &bz, NULL));
221 
222   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
223   PetscCallMPI(MPI_Comm_size(comm, &size));
224   if (ctype == IS_COLORING_LOCAL) {
225     if (size == 1) {
226       ctype = IS_COLORING_GLOBAL;
227     } else {
228       PetscCheck((dim == 1) || !((m == 1 && bx == DM_BOUNDARY_PERIODIC) || (n == 1 && by == DM_BOUNDARY_PERIODIC) || (p == 1 && bz == DM_BOUNDARY_PERIODIC)), PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain on the same process");
229     }
230   }
231 
232   /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ
233      matrices is for the blocks, not the individual matrix elements  */
234   PetscCall(PetscStrbeginswith(da->mattype, MATBAIJ, &isBAIJ));
235   if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATMPIBAIJ, &isBAIJ));
236   if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATSEQBAIJ, &isBAIJ));
237   if (isBAIJ) {
238     dd->w  = 1;
239     dd->xs = dd->xs / nc;
240     dd->xe = dd->xe / nc;
241     dd->Xs = dd->Xs / nc;
242     dd->Xe = dd->Xe / nc;
243   }
244 
245   /*
246      We do not provide a getcoloring function in the DMDA operations because
247    the basic DMDA does not know about matrices. We think of DMDA as being
248    more low-level then matrices.
249   */
250   if (dim == 1) PetscCall(DMCreateColoring_DA_1d_MPIAIJ(da, ctype, coloring));
251   else if (dim == 2) PetscCall(DMCreateColoring_DA_2d_MPIAIJ(da, ctype, coloring));
252   else if (dim == 3) PetscCall(DMCreateColoring_DA_3d_MPIAIJ(da, ctype, coloring));
253   else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not done for %" PetscInt_FMT " dimension, send us mail petsc-maint@mcs.anl.gov for code", dim);
254   if (isBAIJ) {
255     dd->w  = nc;
256     dd->xs = dd->xs * nc;
257     dd->xe = dd->xe * nc;
258     dd->Xs = dd->Xs * nc;
259     dd->Xe = dd->Xe * nc;
260   }
261   PetscFunctionReturn(PETSC_SUCCESS);
262 }
263 
264 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
265 {
266   PetscInt         xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, M, N, dim, s, k, nc, col;
267   PetscInt         ncolors = 0;
268   MPI_Comm         comm;
269   DMBoundaryType   bx, by;
270   DMDAStencilType  st;
271   ISColoringValue *colors;
272   DM_DA           *dd = (DM_DA *)da->data;
273 
274   PetscFunctionBegin;
275   /*
276          nc - number of components per grid point
277          col - number of colors needed in one direction for single component problem
278 
279   */
280   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st));
281   col = 2 * s + 1;
282   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
283   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
284   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
285 
286   /* special case as taught to us by Paul Hovland */
287   if (st == DMDA_STENCIL_STAR && s == 1) {
288     PetscCall(DMCreateColoring_DA_2d_5pt_MPIAIJ(da, ctype, coloring));
289   } else {
290     if (ctype == IS_COLORING_GLOBAL) {
291       if (!dd->localcoloring) {
292         PetscCall(PetscMalloc1(nc * nx * ny, &colors));
293         ii = 0;
294         for (j = ys; j < ys + ny; j++) {
295           for (i = xs; i < xs + nx; i++) {
296             for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((i % col) + col * (j % col));
297           }
298         }
299         ncolors = nc + nc * (col - 1 + col * (col - 1));
300         PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring));
301       }
302       *coloring = dd->localcoloring;
303     } else if (ctype == IS_COLORING_LOCAL) {
304       if (!dd->ghostedcoloring) {
305         PetscCall(PetscMalloc1(nc * gnx * gny, &colors));
306         ii = 0;
307         for (j = gys; j < gys + gny; j++) {
308           for (i = gxs; i < gxs + gnx; i++) {
309             for (k = 0; k < nc; k++) {
310               /* the complicated stuff is to handle periodic boundaries */
311               colors[ii++] = k + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col));
312             }
313           }
314         }
315         ncolors = nc + nc * (col - 1 + col * (col - 1));
316         PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
317         /* PetscIntView(ncolors,(PetscInt*)colors,0); */
318 
319         PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
320       }
321       *coloring = dd->ghostedcoloring;
322     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
323   }
324   PetscCall(ISColoringReference(*coloring));
325   PetscFunctionReturn(PETSC_SUCCESS);
326 }
327 
328 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
329 {
330   PetscInt         xs, ys, nx, ny, i, j, gxs, gys, gnx, gny, m, n, p, dim, s, k, nc, col, zs, gzs, ii, l, nz, gnz, M, N, P;
331   PetscInt         ncolors;
332   MPI_Comm         comm;
333   DMBoundaryType   bx, by, bz;
334   DMDAStencilType  st;
335   ISColoringValue *colors;
336   DM_DA           *dd = (DM_DA *)da->data;
337 
338   PetscFunctionBegin;
339   /*
340          nc - number of components per grid point
341          col - number of colors needed in one direction for single component problem
342   */
343   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
344   col = 2 * s + 1;
345   PetscCheck(bx != DM_BOUNDARY_PERIODIC || (m % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in X is divisible\n\
346                  by 2*stencil_width + 1\n");
347   PetscCheck(by != DM_BOUNDARY_PERIODIC || (n % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Y is divisible\n\
348                  by 2*stencil_width + 1\n");
349   PetscCheck(bz != DM_BOUNDARY_PERIODIC || (p % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Z is divisible\n\
350                  by 2*stencil_width + 1\n");
351 
352   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
353   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
354   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
355 
356   /* create the coloring */
357   if (ctype == IS_COLORING_GLOBAL) {
358     if (!dd->localcoloring) {
359       PetscCall(PetscMalloc1(nc * nx * ny * nz, &colors));
360       ii = 0;
361       for (k = zs; k < zs + nz; k++) {
362         for (j = ys; j < ys + ny; j++) {
363           for (i = xs; i < xs + nx; i++) {
364             for (l = 0; l < nc; l++) colors[ii++] = l + nc * ((i % col) + col * (j % col) + col * col * (k % col));
365           }
366         }
367       }
368       ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1));
369       PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny * nz, colors, PETSC_OWN_POINTER, &dd->localcoloring));
370     }
371     *coloring = dd->localcoloring;
372   } else if (ctype == IS_COLORING_LOCAL) {
373     if (!dd->ghostedcoloring) {
374       PetscCall(PetscMalloc1(nc * gnx * gny * gnz, &colors));
375       ii = 0;
376       for (k = gzs; k < gzs + gnz; k++) {
377         for (j = gys; j < gys + gny; j++) {
378           for (i = gxs; i < gxs + gnx; i++) {
379             for (l = 0; l < nc; l++) {
380               /* the complicated stuff is to handle periodic boundaries */
381               colors[ii++] = l + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col) + col * col * (SetInRange(k, p) % col));
382             }
383           }
384         }
385       }
386       ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1));
387       PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny * gnz, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
388       PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
389     }
390     *coloring = dd->ghostedcoloring;
391   } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
392   PetscCall(ISColoringReference(*coloring));
393   PetscFunctionReturn(PETSC_SUCCESS);
394 }
395 
396 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
397 {
398   PetscInt         xs, nx, i, i1, gxs, gnx, l, m, M, dim, s, nc, col;
399   PetscInt         ncolors;
400   MPI_Comm         comm;
401   DMBoundaryType   bx;
402   ISColoringValue *colors;
403   DM_DA           *dd = (DM_DA *)da->data;
404 
405   PetscFunctionBegin;
406   /*
407          nc - number of components per grid point
408          col - number of colors needed in one direction for single component problem
409   */
410   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, &M, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
411   col = 2 * s + 1;
412   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
413   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
414   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
415 
416   /* create the coloring */
417   if (ctype == IS_COLORING_GLOBAL) {
418     if (!dd->localcoloring) {
419       PetscCall(PetscMalloc1(nc * nx, &colors));
420       if (dd->ofillcols) {
421         PetscInt tc = 0;
422         for (i = 0; i < nc; i++) tc += (PetscInt)(dd->ofillcols[i] > 0);
423         i1 = 0;
424         for (i = xs; i < xs + nx; i++) {
425           for (l = 0; l < nc; l++) {
426             if (dd->ofillcols[l] && (i % col)) {
427               colors[i1++] = nc - 1 + tc * ((i % col) - 1) + dd->ofillcols[l];
428             } else {
429               colors[i1++] = l;
430             }
431           }
432         }
433         ncolors = nc + 2 * s * tc;
434       } else {
435         i1 = 0;
436         for (i = xs; i < xs + nx; i++) {
437           for (l = 0; l < nc; l++) colors[i1++] = l + nc * (i % col);
438         }
439         ncolors = nc + nc * (col - 1);
440       }
441       PetscCall(ISColoringCreate(comm, ncolors, nc * nx, colors, PETSC_OWN_POINTER, &dd->localcoloring));
442     }
443     *coloring = dd->localcoloring;
444   } else if (ctype == IS_COLORING_LOCAL) {
445     if (!dd->ghostedcoloring) {
446       PetscCall(PetscMalloc1(nc * gnx, &colors));
447       i1 = 0;
448       for (i = gxs; i < gxs + gnx; i++) {
449         for (l = 0; l < nc; l++) {
450           /* the complicated stuff is to handle periodic boundaries */
451           colors[i1++] = l + nc * (SetInRange(i, m) % col);
452         }
453       }
454       ncolors = nc + nc * (col - 1);
455       PetscCall(ISColoringCreate(comm, ncolors, nc * gnx, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
456       PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
457     }
458     *coloring = dd->ghostedcoloring;
459   } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
460   PetscCall(ISColoringReference(*coloring));
461   PetscFunctionReturn(PETSC_SUCCESS);
462 }
463 
464 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
465 {
466   PetscInt         xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, dim, s, k, nc;
467   PetscInt         ncolors;
468   MPI_Comm         comm;
469   DMBoundaryType   bx, by;
470   ISColoringValue *colors;
471   DM_DA           *dd = (DM_DA *)da->data;
472 
473   PetscFunctionBegin;
474   /*
475          nc - number of components per grid point
476          col - number of colors needed in one direction for single component problem
477   */
478   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, NULL));
479   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
480   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
481   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
482   /* create the coloring */
483   if (ctype == IS_COLORING_GLOBAL) {
484     if (!dd->localcoloring) {
485       PetscCall(PetscMalloc1(nc * nx * ny, &colors));
486       ii = 0;
487       for (j = ys; j < ys + ny; j++) {
488         for (i = xs; i < xs + nx; i++) {
489           for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((3 * j + i) % 5);
490         }
491       }
492       ncolors = 5 * nc;
493       PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring));
494     }
495     *coloring = dd->localcoloring;
496   } else if (ctype == IS_COLORING_LOCAL) {
497     if (!dd->ghostedcoloring) {
498       PetscCall(PetscMalloc1(nc * gnx * gny, &colors));
499       ii = 0;
500       for (j = gys; j < gys + gny; j++) {
501         for (i = gxs; i < gxs + gnx; i++) {
502           for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((3 * SetInRange(j, n) + SetInRange(i, m)) % 5);
503         }
504       }
505       ncolors = 5 * nc;
506       PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
507       PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
508     }
509     *coloring = dd->ghostedcoloring;
510   } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
511   PetscFunctionReturn(PETSC_SUCCESS);
512 }
513 
514 /* =========================================================================== */
515 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM, Mat);
516 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM, Mat);
517 extern PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM, Mat);
518 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM, Mat);
519 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM, Mat);
520 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM, Mat);
521 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM, Mat);
522 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM, Mat);
523 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM, Mat);
524 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM, Mat);
525 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM, Mat);
526 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM, Mat);
527 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM, Mat);
528 extern PetscErrorCode DMCreateMatrix_DA_IS(DM, Mat);
529 
530 static PetscErrorCode MatView_MPI_DA(Mat A, PetscViewer viewer)
531 {
532   DM                da;
533   const char       *prefix;
534   Mat               AA, Anatural;
535   AO                ao;
536   PetscInt          rstart, rend, *petsc, i;
537   IS                is;
538   MPI_Comm          comm;
539   PetscViewerFormat format;
540   PetscBool         flag;
541 
542   PetscFunctionBegin;
543   /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */
544   PetscCall(PetscViewerGetFormat(viewer, &format));
545   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(PETSC_SUCCESS);
546 
547   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
548   PetscCall(MatGetDM(A, &da));
549   PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA");
550 
551   PetscCall(DMDAGetAO(da, &ao));
552   PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
553   PetscCall(PetscMalloc1(rend - rstart, &petsc));
554   for (i = rstart; i < rend; i++) petsc[i - rstart] = i;
555   PetscCall(AOApplicationToPetsc(ao, rend - rstart, petsc));
556   PetscCall(ISCreateGeneral(comm, rend - rstart, petsc, PETSC_OWN_POINTER, &is));
557 
558   /* call viewer on natural ordering */
559   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPISELL, &flag));
560   if (flag) {
561     PetscCall(MatConvert(A, MATAIJ, MAT_INITIAL_MATRIX, &AA));
562     A = AA;
563   }
564   PetscCall(MatCreateSubMatrix(A, is, is, MAT_INITIAL_MATRIX, &Anatural));
565   PetscCall(ISDestroy(&is));
566   PetscCall(PetscObjectGetOptionsPrefix((PetscObject)A, &prefix));
567   PetscCall(PetscObjectSetOptionsPrefix((PetscObject)Anatural, prefix));
568   PetscCall(PetscObjectSetName((PetscObject)Anatural, ((PetscObject)A)->name));
569   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE;
570   PetscCall(MatView(Anatural, viewer));
571   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE;
572   PetscCall(MatDestroy(&Anatural));
573   if (flag) PetscCall(MatDestroy(&AA));
574   PetscFunctionReturn(PETSC_SUCCESS);
575 }
576 
577 static PetscErrorCode MatLoad_MPI_DA(Mat A, PetscViewer viewer)
578 {
579   DM       da;
580   Mat      Anatural, Aapp;
581   AO       ao;
582   PetscInt rstart, rend, *app, i, m, n, M, N;
583   IS       is;
584   MPI_Comm comm;
585 
586   PetscFunctionBegin;
587   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
588   PetscCall(MatGetDM(A, &da));
589   PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA");
590 
591   /* Load the matrix in natural ordering */
592   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Anatural));
593   PetscCall(MatSetType(Anatural, ((PetscObject)A)->type_name));
594   PetscCall(MatGetSize(A, &M, &N));
595   PetscCall(MatGetLocalSize(A, &m, &n));
596   PetscCall(MatSetSizes(Anatural, m, n, M, N));
597   PetscCall(MatLoad(Anatural, viewer));
598 
599   /* Map natural ordering to application ordering and create IS */
600   PetscCall(DMDAGetAO(da, &ao));
601   PetscCall(MatGetOwnershipRange(Anatural, &rstart, &rend));
602   PetscCall(PetscMalloc1(rend - rstart, &app));
603   for (i = rstart; i < rend; i++) app[i - rstart] = i;
604   PetscCall(AOPetscToApplication(ao, rend - rstart, app));
605   PetscCall(ISCreateGeneral(comm, rend - rstart, app, PETSC_OWN_POINTER, &is));
606 
607   /* Do permutation and replace header */
608   PetscCall(MatCreateSubMatrix(Anatural, is, is, MAT_INITIAL_MATRIX, &Aapp));
609   PetscCall(MatHeaderReplace(A, &Aapp));
610   PetscCall(ISDestroy(&is));
611   PetscCall(MatDestroy(&Anatural));
612   PetscFunctionReturn(PETSC_SUCCESS);
613 }
614 
615 PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J)
616 {
617   PetscInt    dim, dof, nx, ny, nz, dims[3], starts[3], M, N, P;
618   Mat         A;
619   MPI_Comm    comm;
620   MatType     Atype;
621   MatType     mtype;
622   PetscMPIInt size;
623   DM_DA      *dd    = (DM_DA *)da->data;
624   void (*aij)(void) = NULL, (*baij)(void) = NULL, (*sbaij)(void) = NULL, (*sell)(void) = NULL, (*is)(void) = NULL;
625 
626   PetscFunctionBegin;
627   PetscCall(MatInitializePackage());
628   mtype = da->mattype;
629 
630   /*
631                                   m
632           ------------------------------------------------------
633          |                                                     |
634          |                                                     |
635          |               ----------------------                |
636          |               |                    |                |
637       n  |           ny  |                    |                |
638          |               |                    |                |
639          |               .---------------------                |
640          |             (xs,ys)     nx                          |
641          |            .                                        |
642          |         (gxs,gys)                                   |
643          |                                                     |
644           -----------------------------------------------------
645   */
646 
647   /*
648          nc - number of components per grid point
649          col - number of colors needed in one direction for single component problem
650   */
651   M   = dd->M;
652   N   = dd->N;
653   P   = dd->P;
654   dim = da->dim;
655   dof = dd->w;
656   /* PetscCall(DMDAGetInfo(da,&dim,&M,&N,&P,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL)); */
657   PetscCall(DMDAGetCorners(da, NULL, NULL, NULL, &nx, &ny, &nz));
658   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
659   PetscCall(MatCreate(comm, &A));
660   PetscCall(MatSetSizes(A, dof * nx * ny * nz, dof * nx * ny * nz, dof * M * N * P, dof * M * N * P));
661   PetscCall(MatSetType(A, mtype));
662   PetscCall(MatSetFromOptions(A));
663   if (dof * nx * ny * nz < da->bind_below) {
664     PetscCall(MatSetBindingPropagates(A, PETSC_TRUE));
665     PetscCall(MatBindToCPU(A, PETSC_TRUE));
666   }
667   PetscCall(MatSetDM(A, da));
668   if (da->structure_only) PetscCall(MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE));
669   PetscCall(MatGetType(A, &Atype));
670   /*
671      We do not provide a getmatrix function in the DMDA operations because
672    the basic DMDA does not know about matrices. We think of DMDA as being more
673    more low-level than matrices. This is kind of cheating but, cause sometimes
674    we think of DMDA has higher level than matrices.
675 
676      We could switch based on Atype (or mtype), but we do not since the
677    specialized setting routines depend only on the particular preallocation
678    details of the matrix, not the type itself.
679   */
680   if (!da->prealloc_skip) { // Flag is likely set when user intends to use MatSetPreallocationCOO()
681     PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJSetPreallocation_C", &aij));
682     if (!aij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJSetPreallocation_C", &aij));
683     if (!aij) {
684       PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIBAIJSetPreallocation_C", &baij));
685       if (!baij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", &baij));
686       if (!baij) {
687         PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPISBAIJSetPreallocation_C", &sbaij));
688         if (!sbaij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqSBAIJSetPreallocation_C", &sbaij));
689         if (!sbaij) {
690           PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPISELLSetPreallocation_C", &sell));
691           if (!sell) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqSELLSetPreallocation_C", &sell));
692         }
693         if (!sell) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatISSetPreallocation_C", &is));
694       }
695     }
696   }
697   if (aij) {
698     if (dim == 1) {
699       if (dd->ofill) {
700         PetscCall(DMCreateMatrix_DA_1d_MPIAIJ_Fill(da, A));
701       } else {
702         DMBoundaryType bx;
703         PetscMPIInt    size;
704         PetscCall(DMDAGetInfo(da, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &bx, NULL, NULL, NULL));
705         PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size));
706         if (size == 1 && bx == DM_BOUNDARY_NONE) {
707           PetscCall(DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(da, A));
708         } else {
709           PetscCall(DMCreateMatrix_DA_1d_MPIAIJ(da, A));
710         }
711       }
712     } else if (dim == 2) {
713       if (dd->ofill) {
714         PetscCall(DMCreateMatrix_DA_2d_MPIAIJ_Fill(da, A));
715       } else {
716         PetscCall(DMCreateMatrix_DA_2d_MPIAIJ(da, A));
717       }
718     } else if (dim == 3) {
719       if (dd->ofill) {
720         PetscCall(DMCreateMatrix_DA_3d_MPIAIJ_Fill(da, A));
721       } else {
722         PetscCall(DMCreateMatrix_DA_3d_MPIAIJ(da, A));
723       }
724     }
725   } else if (baij) {
726     if (dim == 2) {
727       PetscCall(DMCreateMatrix_DA_2d_MPIBAIJ(da, A));
728     } else if (dim == 3) {
729       PetscCall(DMCreateMatrix_DA_3d_MPIBAIJ(da, A));
730     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
731   } else if (sbaij) {
732     if (dim == 2) {
733       PetscCall(DMCreateMatrix_DA_2d_MPISBAIJ(da, A));
734     } else if (dim == 3) {
735       PetscCall(DMCreateMatrix_DA_3d_MPISBAIJ(da, A));
736     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
737   } else if (sell) {
738     if (dim == 2) {
739       PetscCall(DMCreateMatrix_DA_2d_MPISELL(da, A));
740     } else if (dim == 3) {
741       PetscCall(DMCreateMatrix_DA_3d_MPISELL(da, A));
742     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
743   } else if (is) {
744     PetscCall(DMCreateMatrix_DA_IS(da, A));
745   } else { // unknown type or da->prealloc_skip so structural information may be needed, but no prealloc
746     ISLocalToGlobalMapping ltog;
747 
748     PetscCall(MatSetBlockSize(A, dof));
749     PetscCall(MatSetUp(A));
750     PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
751     PetscCall(MatSetLocalToGlobalMapping(A, ltog, ltog));
752   }
753   PetscCall(DMDAGetGhostCorners(da, &starts[0], &starts[1], &starts[2], &dims[0], &dims[1], &dims[2]));
754   PetscCall(MatSetStencil(A, dim, dims, starts, dof));
755   PetscCall(MatSetDM(A, da));
756   PetscCallMPI(MPI_Comm_size(comm, &size));
757   if (size > 1) {
758     /* change viewer to display matrix in natural ordering */
759     PetscCall(MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA));
760     PetscCall(MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA));
761   }
762   *J = A;
763   PetscFunctionReturn(PETSC_SUCCESS);
764 }
765 
766 PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]);
767 
768 PetscErrorCode DMCreateMatrix_DA_IS(DM dm, Mat J)
769 {
770   DM_DA                 *da = (DM_DA *)dm->data;
771   Mat                    lJ, P;
772   ISLocalToGlobalMapping ltog;
773   IS                     is;
774   PetscBT                bt;
775   const PetscInt        *e_loc, *idx;
776   PetscInt               i, nel, nen, nv, dof, *gidx, n, N;
777 
778   /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted)
779      We need to filter out the local indices that are not represented through the DMDAGetElements decomposition */
780   PetscFunctionBegin;
781   dof = da->w;
782   PetscCall(MatSetBlockSize(J, dof));
783   PetscCall(DMGetLocalToGlobalMapping(dm, &ltog));
784 
785   /* flag local elements indices in local DMDA numbering */
786   PetscCall(ISLocalToGlobalMappingGetSize(ltog, &nv));
787   PetscCall(PetscBTCreate(nv / dof, &bt));
788   PetscCall(DMDAGetElements(dm, &nel, &nen, &e_loc)); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */
789   for (i = 0; i < nel * nen; i++) PetscCall(PetscBTSet(bt, e_loc[i]));
790 
791   /* filter out (set to -1) the global indices not used by the local elements */
792   PetscCall(PetscMalloc1(nv / dof, &gidx));
793   PetscCall(ISLocalToGlobalMappingGetBlockIndices(ltog, &idx));
794   PetscCall(PetscArraycpy(gidx, idx, nv / dof));
795   PetscCall(ISLocalToGlobalMappingRestoreBlockIndices(ltog, &idx));
796   for (i = 0; i < nv / dof; i++)
797     if (!PetscBTLookup(bt, i)) gidx[i] = -1;
798   PetscCall(PetscBTDestroy(&bt));
799   PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)dm), dof, nv / dof, gidx, PETSC_OWN_POINTER, &is));
800   PetscCall(ISLocalToGlobalMappingCreateIS(is, &ltog));
801   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
802   PetscCall(ISLocalToGlobalMappingDestroy(&ltog));
803   PetscCall(ISDestroy(&is));
804 
805   /* Preallocation */
806   if (dm->prealloc_skip) {
807     PetscCall(MatSetUp(J));
808   } else {
809     PetscCall(MatISGetLocalMat(J, &lJ));
810     PetscCall(MatGetLocalToGlobalMapping(lJ, &ltog, NULL));
811     PetscCall(MatCreate(PetscObjectComm((PetscObject)lJ), &P));
812     PetscCall(MatSetType(P, MATPREALLOCATOR));
813     PetscCall(MatSetLocalToGlobalMapping(P, ltog, ltog));
814     PetscCall(MatGetSize(lJ, &N, NULL));
815     PetscCall(MatGetLocalSize(lJ, &n, NULL));
816     PetscCall(MatSetSizes(P, n, n, N, N));
817     PetscCall(MatSetBlockSize(P, dof));
818     PetscCall(MatSetUp(P));
819     for (i = 0; i < nel; i++) PetscCall(MatSetValuesBlockedLocal(P, nen, e_loc + i * nen, nen, e_loc + i * nen, NULL, INSERT_VALUES));
820     PetscCall(MatPreallocatorPreallocate(P, (PetscBool)!da->prealloc_only, lJ));
821     PetscCall(MatISRestoreLocalMat(J, &lJ));
822     PetscCall(DMDARestoreElements(dm, &nel, &nen, &e_loc));
823     PetscCall(MatDestroy(&P));
824 
825     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
826     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
827   }
828   PetscFunctionReturn(PETSC_SUCCESS);
829 }
830 
831 PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da, Mat J)
832 {
833   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p;
834   PetscInt               lstart, lend, pstart, pend, *dnz, *onz;
835   MPI_Comm               comm;
836   PetscScalar           *values;
837   DMBoundaryType         bx, by;
838   ISLocalToGlobalMapping ltog;
839   DMDAStencilType        st;
840 
841   PetscFunctionBegin;
842   /*
843          nc - number of components per grid point
844          col - number of colors needed in one direction for single component problem
845   */
846   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st));
847   col = 2 * s + 1;
848   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
849   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
850   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
851 
852   PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols));
853   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
854 
855   PetscCall(MatSetBlockSize(J, nc));
856   /* determine the matrix preallocation information */
857   MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
858   for (i = xs; i < xs + nx; i++) {
859     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
860     pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
861 
862     for (j = ys; j < ys + ny; j++) {
863       slot = i - gxs + gnx * (j - gys);
864 
865       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
866       lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
867 
868       cnt = 0;
869       for (k = 0; k < nc; k++) {
870         for (l = lstart; l < lend + 1; l++) {
871           for (p = pstart; p < pend + 1; p++) {
872             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
873               cols[cnt++] = k + nc * (slot + gnx * l + p);
874             }
875           }
876         }
877         rows[k] = k + nc * (slot);
878       }
879       PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
880     }
881   }
882   PetscCall(MatSetBlockSize(J, nc));
883   PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz));
884   PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz));
885   MatPreallocateEnd(dnz, onz);
886 
887   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
888 
889   /*
890     For each node in the grid: we get the neighbors in the local (on processor ordering
891     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
892     PETSc ordering.
893   */
894   if (!da->prealloc_only) {
895     PetscCall(PetscCalloc1(col * col * nc * nc, &values));
896     for (i = xs; i < xs + nx; i++) {
897       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
898       pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
899 
900       for (j = ys; j < ys + ny; j++) {
901         slot = i - gxs + gnx * (j - gys);
902 
903         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
904         lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
905 
906         cnt = 0;
907         for (k = 0; k < nc; k++) {
908           for (l = lstart; l < lend + 1; l++) {
909             for (p = pstart; p < pend + 1; p++) {
910               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
911                 cols[cnt++] = k + nc * (slot + gnx * l + p);
912               }
913             }
914           }
915           rows[k] = k + nc * (slot);
916         }
917         PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES));
918       }
919     }
920     PetscCall(PetscFree(values));
921     /* do not copy values to GPU since they are all zero and not yet needed there */
922     PetscCall(MatBindToCPU(J, PETSC_TRUE));
923     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
924     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
925     PetscCall(MatBindToCPU(J, PETSC_FALSE));
926     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
927   }
928   PetscCall(PetscFree2(rows, cols));
929   PetscFunctionReturn(PETSC_SUCCESS);
930 }
931 
932 PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da, Mat J)
933 {
934   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
935   PetscInt               m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL;
936   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
937   MPI_Comm               comm;
938   PetscScalar           *values;
939   DMBoundaryType         bx, by, bz;
940   ISLocalToGlobalMapping ltog;
941   DMDAStencilType        st;
942 
943   PetscFunctionBegin;
944   /*
945          nc - number of components per grid point
946          col - number of colors needed in one direction for single component problem
947   */
948   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
949   col = 2 * s + 1;
950   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
951   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
952   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
953 
954   PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols));
955   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
956 
957   PetscCall(MatSetBlockSize(J, nc));
958   /* determine the matrix preallocation information */
959   MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
960   for (i = xs; i < xs + nx; i++) {
961     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
962     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
963     for (j = ys; j < ys + ny; j++) {
964       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
965       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
966       for (k = zs; k < zs + nz; k++) {
967         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
968         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
969 
970         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
971 
972         cnt = 0;
973         for (l = 0; l < nc; l++) {
974           for (ii = istart; ii < iend + 1; ii++) {
975             for (jj = jstart; jj < jend + 1; jj++) {
976               for (kk = kstart; kk < kend + 1; kk++) {
977                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
978                   cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
979                 }
980               }
981             }
982           }
983           rows[l] = l + nc * (slot);
984         }
985         PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
986       }
987     }
988   }
989   PetscCall(MatSetBlockSize(J, nc));
990   PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz));
991   PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz));
992   MatPreallocateEnd(dnz, onz);
993   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
994 
995   /*
996     For each node in the grid: we get the neighbors in the local (on processor ordering
997     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
998     PETSc ordering.
999   */
1000   if (!da->prealloc_only) {
1001     PetscCall(PetscCalloc1(col * col * col * nc * nc * nc, &values));
1002     for (i = xs; i < xs + nx; i++) {
1003       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1004       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1005       for (j = ys; j < ys + ny; j++) {
1006         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1007         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1008         for (k = zs; k < zs + nz; k++) {
1009           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1010           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1011 
1012           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1013 
1014           cnt = 0;
1015           for (l = 0; l < nc; l++) {
1016             for (ii = istart; ii < iend + 1; ii++) {
1017               for (jj = jstart; jj < jend + 1; jj++) {
1018                 for (kk = kstart; kk < kend + 1; kk++) {
1019                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1020                     cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
1021                   }
1022                 }
1023               }
1024             }
1025             rows[l] = l + nc * (slot);
1026           }
1027           PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES));
1028         }
1029       }
1030     }
1031     PetscCall(PetscFree(values));
1032     /* do not copy values to GPU since they are all zero and not yet needed there */
1033     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1034     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1035     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1036     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1037     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1038   }
1039   PetscCall(PetscFree2(rows, cols));
1040   PetscFunctionReturn(PETSC_SUCCESS);
1041 }
1042 
1043 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da, Mat J)
1044 {
1045   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, M, N;
1046   PetscInt               lstart, lend, pstart, pend, *dnz, *onz;
1047   MPI_Comm               comm;
1048   DMBoundaryType         bx, by;
1049   ISLocalToGlobalMapping ltog, mltog;
1050   DMDAStencilType        st;
1051   PetscBool              removedups = PETSC_FALSE, alreadyboundtocpu = PETSC_TRUE;
1052 
1053   PetscFunctionBegin;
1054   /*
1055          nc - number of components per grid point
1056          col - number of colors needed in one direction for single component problem
1057   */
1058   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st));
1059   if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE));
1060   col = 2 * s + 1;
1061   /*
1062        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1063        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1064   */
1065   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1066   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1067   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1068   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1069   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1070 
1071   PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols));
1072   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1073 
1074   PetscCall(MatSetBlockSize(J, nc));
1075   /* determine the matrix preallocation information */
1076   MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
1077   for (i = xs; i < xs + nx; i++) {
1078     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1079     pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1080 
1081     for (j = ys; j < ys + ny; j++) {
1082       slot = i - gxs + gnx * (j - gys);
1083 
1084       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1085       lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1086 
1087       cnt = 0;
1088       for (k = 0; k < nc; k++) {
1089         for (l = lstart; l < lend + 1; l++) {
1090           for (p = pstart; p < pend + 1; p++) {
1091             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
1092               cols[cnt++] = k + nc * (slot + gnx * l + p);
1093             }
1094           }
1095         }
1096         rows[k] = k + nc * (slot);
1097       }
1098       if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1099       else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1100     }
1101   }
1102   PetscCall(MatSetBlockSize(J, nc));
1103   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
1104   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
1105   MatPreallocateEnd(dnz, onz);
1106   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1107   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1108 
1109   /*
1110     For each node in the grid: we get the neighbors in the local (on processor ordering
1111     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1112     PETSc ordering.
1113   */
1114   if (!da->prealloc_only) {
1115     for (i = xs; i < xs + nx; i++) {
1116       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1117       pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1118 
1119       for (j = ys; j < ys + ny; j++) {
1120         slot = i - gxs + gnx * (j - gys);
1121 
1122         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1123         lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1124 
1125         cnt = 0;
1126         for (l = lstart; l < lend + 1; l++) {
1127           for (p = pstart; p < pend + 1; p++) {
1128             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
1129               cols[cnt++] = nc * (slot + gnx * l + p);
1130               for (k = 1; k < nc; k++) {
1131                 cols[cnt] = 1 + cols[cnt - 1];
1132                 cnt++;
1133               }
1134             }
1135           }
1136         }
1137         for (k = 0; k < nc; k++) rows[k] = k + nc * (slot);
1138         PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1139       }
1140     }
1141     /* do not copy values to GPU since they are all zero and not yet needed there */
1142     PetscCall(MatBoundToCPU(J, &alreadyboundtocpu));
1143     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1144     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1145     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1146     if (!alreadyboundtocpu) PetscCall(MatBindToCPU(J, PETSC_FALSE));
1147     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1148     if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1149   }
1150   PetscCall(PetscFree2(rows, cols));
1151   PetscFunctionReturn(PETSC_SUCCESS);
1152 }
1153 
1154 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da, Mat J)
1155 {
1156   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1157   PetscInt               m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, M, N;
1158   PetscInt               lstart, lend, pstart, pend, *dnz, *onz;
1159   DM_DA                 *dd = (DM_DA *)da->data;
1160   PetscInt               ifill_col, *ofill = dd->ofill, *dfill = dd->dfill;
1161   MPI_Comm               comm;
1162   DMBoundaryType         bx, by;
1163   ISLocalToGlobalMapping ltog;
1164   DMDAStencilType        st;
1165   PetscBool              removedups = PETSC_FALSE;
1166 
1167   PetscFunctionBegin;
1168   /*
1169          nc - number of components per grid point
1170          col - number of colors needed in one direction for single component problem
1171   */
1172   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st));
1173   col = 2 * s + 1;
1174   /*
1175        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1176        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1177   */
1178   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1179   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1180   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1181   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1182   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1183 
1184   PetscCall(PetscMalloc1(col * col * nc, &cols));
1185   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1186 
1187   PetscCall(MatSetBlockSize(J, nc));
1188   /* determine the matrix preallocation information */
1189   MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
1190   for (i = xs; i < xs + nx; i++) {
1191     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1192     pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1193 
1194     for (j = ys; j < ys + ny; j++) {
1195       slot = i - gxs + gnx * (j - gys);
1196 
1197       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1198       lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1199 
1200       for (k = 0; k < nc; k++) {
1201         cnt = 0;
1202         for (l = lstart; l < lend + 1; l++) {
1203           for (p = pstart; p < pend + 1; p++) {
1204             if (l || p) {
1205               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */
1206                 for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p);
1207               }
1208             } else {
1209               if (dfill) {
1210                 for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p);
1211               } else {
1212                 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p);
1213               }
1214             }
1215           }
1216         }
1217         row    = k + nc * (slot);
1218         maxcnt = PetscMax(maxcnt, cnt);
1219         if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
1220         else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
1221       }
1222     }
1223   }
1224   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
1225   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
1226   MatPreallocateEnd(dnz, onz);
1227   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1228 
1229   /*
1230     For each node in the grid: we get the neighbors in the local (on processor ordering
1231     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1232     PETSc ordering.
1233   */
1234   if (!da->prealloc_only) {
1235     for (i = xs; i < xs + nx; i++) {
1236       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1237       pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1238 
1239       for (j = ys; j < ys + ny; j++) {
1240         slot = i - gxs + gnx * (j - gys);
1241 
1242         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1243         lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1244 
1245         for (k = 0; k < nc; k++) {
1246           cnt = 0;
1247           for (l = lstart; l < lend + 1; l++) {
1248             for (p = pstart; p < pend + 1; p++) {
1249               if (l || p) {
1250                 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */
1251                   for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p);
1252                 }
1253               } else {
1254                 if (dfill) {
1255                   for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p);
1256                 } else {
1257                   for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p);
1258                 }
1259               }
1260             }
1261           }
1262           row = k + nc * (slot);
1263           PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1264         }
1265       }
1266     }
1267     /* do not copy values to GPU since they are all zero and not yet needed there */
1268     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1269     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1270     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1271     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1272     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1273   }
1274   PetscCall(PetscFree(cols));
1275   PetscFunctionReturn(PETSC_SUCCESS);
1276 }
1277 
1278 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da, Mat J)
1279 {
1280   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1281   PetscInt               m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL;
1282   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
1283   MPI_Comm               comm;
1284   DMBoundaryType         bx, by, bz;
1285   ISLocalToGlobalMapping ltog, mltog;
1286   DMDAStencilType        st;
1287   PetscBool              removedups = PETSC_FALSE;
1288 
1289   PetscFunctionBegin;
1290   /*
1291          nc - number of components per grid point
1292          col - number of colors needed in one direction for single component problem
1293   */
1294   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
1295   if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE));
1296   col = 2 * s + 1;
1297 
1298   /*
1299        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1300        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1301   */
1302   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1303   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1304   if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE;
1305 
1306   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
1307   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
1308   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1309 
1310   PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols));
1311   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1312 
1313   PetscCall(MatSetBlockSize(J, nc));
1314   /* determine the matrix preallocation information */
1315   MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
1316   for (i = xs; i < xs + nx; i++) {
1317     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1318     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1319     for (j = ys; j < ys + ny; j++) {
1320       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1321       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1322       for (k = zs; k < zs + nz; k++) {
1323         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1324         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1325 
1326         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1327 
1328         cnt = 0;
1329         for (l = 0; l < nc; l++) {
1330           for (ii = istart; ii < iend + 1; ii++) {
1331             for (jj = jstart; jj < jend + 1; jj++) {
1332               for (kk = kstart; kk < kend + 1; kk++) {
1333                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1334                   cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
1335                 }
1336               }
1337             }
1338           }
1339           rows[l] = l + nc * (slot);
1340         }
1341         if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1342         else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1343       }
1344     }
1345   }
1346   PetscCall(MatSetBlockSize(J, nc));
1347   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
1348   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
1349   MatPreallocateEnd(dnz, onz);
1350   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1351   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1352 
1353   /*
1354     For each node in the grid: we get the neighbors in the local (on processor ordering
1355     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1356     PETSc ordering.
1357   */
1358   if (!da->prealloc_only) {
1359     for (i = xs; i < xs + nx; i++) {
1360       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1361       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1362       for (j = ys; j < ys + ny; j++) {
1363         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1364         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1365         for (k = zs; k < zs + nz; k++) {
1366           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1367           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1368 
1369           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1370 
1371           cnt = 0;
1372           for (kk = kstart; kk < kend + 1; kk++) {
1373             for (jj = jstart; jj < jend + 1; jj++) {
1374               for (ii = istart; ii < iend + 1; ii++) {
1375                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1376                   cols[cnt++] = nc * (slot + ii + gnx * jj + gnx * gny * kk);
1377                   for (l = 1; l < nc; l++) {
1378                     cols[cnt] = 1 + cols[cnt - 1];
1379                     cnt++;
1380                   }
1381                 }
1382               }
1383             }
1384           }
1385           rows[0] = nc * (slot);
1386           for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1387           PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1388         }
1389       }
1390     }
1391     /* do not copy values to GPU since they are all zero and not yet needed there */
1392     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1393     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1394     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1395     if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1396     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1397     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1398   }
1399   PetscCall(PetscFree2(rows, cols));
1400   PetscFunctionReturn(PETSC_SUCCESS);
1401 }
1402 
1403 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da, Mat J)
1404 {
1405   DM_DA                 *dd = (DM_DA *)da->data;
1406   PetscInt               xs, nx, i, j, gxs, gnx, row, k, l;
1407   PetscInt               m, dim, s, *cols = NULL, nc, cnt, maxcnt = 0, *ocols;
1408   PetscInt              *ofill = dd->ofill, *dfill = dd->dfill;
1409   DMBoundaryType         bx;
1410   ISLocalToGlobalMapping ltog;
1411   PetscMPIInt            rank, size;
1412 
1413   PetscFunctionBegin;
1414   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)da), &rank));
1415   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size));
1416 
1417   /*
1418          nc - number of components per grid point
1419   */
1420   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
1421   PetscCheck(s <= 1, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Matrix creation for 1d not implemented correctly for stencil width larger than 1");
1422   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
1423   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
1424 
1425   PetscCall(MatSetBlockSize(J, nc));
1426   PetscCall(PetscCalloc2(nx * nc, &cols, nx * nc, &ocols));
1427 
1428   /*
1429         note should be smaller for first and last process with no periodic
1430         does not handle dfill
1431   */
1432   cnt = 0;
1433   /* coupling with process to the left */
1434   for (i = 0; i < s; i++) {
1435     for (j = 0; j < nc; j++) {
1436       ocols[cnt] = ((rank == 0) ? 0 : (s - i) * (ofill[j + 1] - ofill[j]));
1437       cols[cnt]  = dfill[j + 1] - dfill[j] + (s + i) * (ofill[j + 1] - ofill[j]);
1438       if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1439         if (size > 1) ocols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]);
1440         else cols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]);
1441       }
1442       maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1443       cnt++;
1444     }
1445   }
1446   for (i = s; i < nx - s; i++) {
1447     for (j = 0; j < nc; j++) {
1448       cols[cnt] = dfill[j + 1] - dfill[j] + 2 * s * (ofill[j + 1] - ofill[j]);
1449       maxcnt    = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1450       cnt++;
1451     }
1452   }
1453   /* coupling with process to the right */
1454   for (i = nx - s; i < nx; i++) {
1455     for (j = 0; j < nc; j++) {
1456       ocols[cnt] = ((rank == (size - 1)) ? 0 : (i - nx + s + 1) * (ofill[j + 1] - ofill[j]));
1457       cols[cnt]  = dfill[j + 1] - dfill[j] + (s + nx - i - 1) * (ofill[j + 1] - ofill[j]);
1458       if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1459         if (size > 1) ocols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]);
1460         else cols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]);
1461       }
1462       maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1463       cnt++;
1464     }
1465   }
1466 
1467   PetscCall(MatSeqAIJSetPreallocation(J, 0, cols));
1468   PetscCall(MatMPIAIJSetPreallocation(J, 0, cols, 0, ocols));
1469   PetscCall(PetscFree2(cols, ocols));
1470 
1471   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1472   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1473 
1474   /*
1475     For each node in the grid: we get the neighbors in the local (on processor ordering
1476     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1477     PETSc ordering.
1478   */
1479   if (!da->prealloc_only) {
1480     PetscCall(PetscMalloc1(maxcnt, &cols));
1481     row = xs * nc;
1482     /* coupling with process to the left */
1483     for (i = xs; i < xs + s; i++) {
1484       for (j = 0; j < nc; j++) {
1485         cnt = 0;
1486         if (rank) {
1487           for (l = 0; l < s; l++) {
1488             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1489           }
1490         }
1491         if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1492           for (l = 0; l < s; l++) {
1493             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (m + i - s - l) * nc + ofill[k];
1494           }
1495         }
1496         if (dfill) {
1497           for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1498         } else {
1499           for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1500         }
1501         for (l = 0; l < s; l++) {
1502           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1503         }
1504         PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1505         row++;
1506       }
1507     }
1508     for (i = xs + s; i < xs + nx - s; i++) {
1509       for (j = 0; j < nc; j++) {
1510         cnt = 0;
1511         for (l = 0; l < s; l++) {
1512           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1513         }
1514         if (dfill) {
1515           for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1516         } else {
1517           for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1518         }
1519         for (l = 0; l < s; l++) {
1520           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1521         }
1522         PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1523         row++;
1524       }
1525     }
1526     /* coupling with process to the right */
1527     for (i = xs + nx - s; i < xs + nx; i++) {
1528       for (j = 0; j < nc; j++) {
1529         cnt = 0;
1530         for (l = 0; l < s; l++) {
1531           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1532         }
1533         if (dfill) {
1534           for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1535         } else {
1536           for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1537         }
1538         if (rank < size - 1) {
1539           for (l = 0; l < s; l++) {
1540             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1541           }
1542         }
1543         if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1544           for (l = 0; l < s; l++) {
1545             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s - l - m + 2) * nc + ofill[k];
1546           }
1547         }
1548         PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1549         row++;
1550       }
1551     }
1552     PetscCall(PetscFree(cols));
1553     /* do not copy values to GPU since they are all zero and not yet needed there */
1554     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1555     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1556     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1557     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1558     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1559   }
1560   PetscFunctionReturn(PETSC_SUCCESS);
1561 }
1562 
1563 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da, Mat J)
1564 {
1565   PetscInt               xs, nx, i, i1, slot, gxs, gnx;
1566   PetscInt               m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l;
1567   PetscInt               istart, iend;
1568   DMBoundaryType         bx;
1569   ISLocalToGlobalMapping ltog, mltog;
1570 
1571   PetscFunctionBegin;
1572   /*
1573          nc - number of components per grid point
1574          col - number of colors needed in one direction for single component problem
1575   */
1576   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
1577   if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE));
1578   col = 2 * s + 1;
1579 
1580   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
1581   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
1582 
1583   PetscCall(MatSetBlockSize(J, nc));
1584   PetscCall(MatSeqAIJSetPreallocation(J, col * nc, NULL));
1585   PetscCall(MatMPIAIJSetPreallocation(J, col * nc, NULL, col * nc, NULL));
1586 
1587   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1588   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1589   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1590 
1591   /*
1592     For each node in the grid: we get the neighbors in the local (on processor ordering
1593     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1594     PETSc ordering.
1595   */
1596   if (!da->prealloc_only) {
1597     PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols));
1598     for (i = xs; i < xs + nx; i++) {
1599       istart = PetscMax(-s, gxs - i);
1600       iend   = PetscMin(s, gxs + gnx - i - 1);
1601       slot   = i - gxs;
1602 
1603       cnt = 0;
1604       for (i1 = istart; i1 < iend + 1; i1++) {
1605         cols[cnt++] = nc * (slot + i1);
1606         for (l = 1; l < nc; l++) {
1607           cols[cnt] = 1 + cols[cnt - 1];
1608           cnt++;
1609         }
1610       }
1611       rows[0] = nc * (slot);
1612       for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1613       PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1614     }
1615     /* do not copy values to GPU since they are all zero and not yet needed there */
1616     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1617     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1618     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1619     if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1620     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1621     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1622     PetscCall(PetscFree2(rows, cols));
1623   }
1624   PetscFunctionReturn(PETSC_SUCCESS);
1625 }
1626 
1627 PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM da, Mat J)
1628 {
1629   PetscInt               xs, nx, i, i1, slot, gxs, gnx;
1630   PetscInt               m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l;
1631   PetscInt               istart, iend;
1632   DMBoundaryType         bx;
1633   ISLocalToGlobalMapping ltog, mltog;
1634 
1635   PetscFunctionBegin;
1636   /*
1637          nc - number of components per grid point
1638          col - number of colors needed in one direction for single component problem
1639   */
1640   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
1641   col = 2 * s + 1;
1642 
1643   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
1644   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
1645 
1646   PetscCall(MatSetBlockSize(J, nc));
1647   PetscCall(MatSeqAIJSetTotalPreallocation(J, nx * nc * col * nc));
1648 
1649   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1650   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1651   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1652 
1653   /*
1654     For each node in the grid: we get the neighbors in the local (on processor ordering
1655     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1656     PETSc ordering.
1657   */
1658   if (!da->prealloc_only) {
1659     PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols));
1660     for (i = xs; i < xs + nx; i++) {
1661       istart = PetscMax(-s, gxs - i);
1662       iend   = PetscMin(s, gxs + gnx - i - 1);
1663       slot   = i - gxs;
1664 
1665       cnt = 0;
1666       for (i1 = istart; i1 < iend + 1; i1++) {
1667         cols[cnt++] = nc * (slot + i1);
1668         for (l = 1; l < nc; l++) {
1669           cols[cnt] = 1 + cols[cnt - 1];
1670           cnt++;
1671         }
1672       }
1673       rows[0] = nc * (slot);
1674       for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1675       PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1676     }
1677     /* do not copy values to GPU since they are all zero and not yet needed there */
1678     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1679     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1680     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1681     if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1682     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1683     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1684     PetscCall(PetscFree2(rows, cols));
1685   }
1686   PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1687   PetscFunctionReturn(PETSC_SUCCESS);
1688 }
1689 
1690 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da, Mat J)
1691 {
1692   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1693   PetscInt               m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz;
1694   PetscInt               istart, iend, jstart, jend, ii, jj;
1695   MPI_Comm               comm;
1696   PetscScalar           *values;
1697   DMBoundaryType         bx, by;
1698   DMDAStencilType        st;
1699   ISLocalToGlobalMapping ltog;
1700 
1701   PetscFunctionBegin;
1702   /*
1703      nc - number of components per grid point
1704      col - number of colors needed in one direction for single component problem
1705   */
1706   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st));
1707   col = 2 * s + 1;
1708 
1709   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1710   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1711   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1712 
1713   PetscCall(PetscMalloc1(col * col * nc * nc, &cols));
1714 
1715   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1716 
1717   /* determine the matrix preallocation information */
1718   MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz);
1719   for (i = xs; i < xs + nx; i++) {
1720     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1721     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1722     for (j = ys; j < ys + ny; j++) {
1723       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1724       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1725       slot   = i - gxs + gnx * (j - gys);
1726 
1727       /* Find block columns in block row */
1728       cnt = 0;
1729       for (ii = istart; ii < iend + 1; ii++) {
1730         for (jj = jstart; jj < jend + 1; jj++) {
1731           if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1732             cols[cnt++] = slot + ii + gnx * jj;
1733           }
1734         }
1735       }
1736       PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz));
1737     }
1738   }
1739   PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz));
1740   PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
1741   MatPreallocateEnd(dnz, onz);
1742 
1743   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1744 
1745   /*
1746     For each node in the grid: we get the neighbors in the local (on processor ordering
1747     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1748     PETSc ordering.
1749   */
1750   if (!da->prealloc_only) {
1751     PetscCall(PetscCalloc1(col * col * nc * nc, &values));
1752     for (i = xs; i < xs + nx; i++) {
1753       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1754       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1755       for (j = ys; j < ys + ny; j++) {
1756         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1757         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1758         slot   = i - gxs + gnx * (j - gys);
1759         cnt    = 0;
1760         for (ii = istart; ii < iend + 1; ii++) {
1761           for (jj = jstart; jj < jend + 1; jj++) {
1762             if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1763               cols[cnt++] = slot + ii + gnx * jj;
1764             }
1765           }
1766         }
1767         PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
1768       }
1769     }
1770     PetscCall(PetscFree(values));
1771     /* do not copy values to GPU since they are all zero and not yet needed there */
1772     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1773     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1774     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1775     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1776     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1777   }
1778   PetscCall(PetscFree(cols));
1779   PetscFunctionReturn(PETSC_SUCCESS);
1780 }
1781 
1782 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da, Mat J)
1783 {
1784   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1785   PetscInt               m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz;
1786   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk;
1787   MPI_Comm               comm;
1788   PetscScalar           *values;
1789   DMBoundaryType         bx, by, bz;
1790   DMDAStencilType        st;
1791   ISLocalToGlobalMapping ltog;
1792 
1793   PetscFunctionBegin;
1794   /*
1795          nc - number of components per grid point
1796          col - number of colors needed in one direction for single component problem
1797   */
1798   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st));
1799   col = 2 * s + 1;
1800 
1801   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
1802   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
1803   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1804 
1805   PetscCall(PetscMalloc1(col * col * col, &cols));
1806 
1807   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1808 
1809   /* determine the matrix preallocation information */
1810   MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz);
1811   for (i = xs; i < xs + nx; i++) {
1812     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1813     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1814     for (j = ys; j < ys + ny; j++) {
1815       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1816       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1817       for (k = zs; k < zs + nz; k++) {
1818         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1819         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1820 
1821         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1822 
1823         /* Find block columns in block row */
1824         cnt = 0;
1825         for (ii = istart; ii < iend + 1; ii++) {
1826           for (jj = jstart; jj < jend + 1; jj++) {
1827             for (kk = kstart; kk < kend + 1; kk++) {
1828               if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1829                 cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
1830               }
1831             }
1832           }
1833         }
1834         PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz));
1835       }
1836     }
1837   }
1838   PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz));
1839   PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
1840   MatPreallocateEnd(dnz, onz);
1841 
1842   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1843 
1844   /*
1845     For each node in the grid: we get the neighbors in the local (on processor ordering
1846     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1847     PETSc ordering.
1848   */
1849   if (!da->prealloc_only) {
1850     PetscCall(PetscCalloc1(col * col * col * nc * nc, &values));
1851     for (i = xs; i < xs + nx; i++) {
1852       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1853       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1854       for (j = ys; j < ys + ny; j++) {
1855         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1856         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1857         for (k = zs; k < zs + nz; k++) {
1858           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1859           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1860 
1861           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1862 
1863           cnt = 0;
1864           for (ii = istart; ii < iend + 1; ii++) {
1865             for (jj = jstart; jj < jend + 1; jj++) {
1866               for (kk = kstart; kk < kend + 1; kk++) {
1867                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1868                   cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
1869                 }
1870               }
1871             }
1872           }
1873           PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
1874         }
1875       }
1876     }
1877     PetscCall(PetscFree(values));
1878     /* do not copy values to GPU since they are all zero and not yet needed there */
1879     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1880     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1881     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1882     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1883     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1884   }
1885   PetscCall(PetscFree(cols));
1886   PetscFunctionReturn(PETSC_SUCCESS);
1887 }
1888 
1889 /*
1890   This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to
1891   identify in the local ordering with periodic domain.
1892 */
1893 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog, PetscInt *row, PetscInt *cnt, PetscInt col[])
1894 {
1895   PetscInt i, n;
1896 
1897   PetscFunctionBegin;
1898   PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, 1, row, row));
1899   PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, *cnt, col, col));
1900   for (i = 0, n = 0; i < *cnt; i++) {
1901     if (col[i] >= *row) col[n++] = col[i];
1902   }
1903   *cnt = n;
1904   PetscFunctionReturn(PETSC_SUCCESS);
1905 }
1906 
1907 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da, Mat J)
1908 {
1909   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1910   PetscInt               m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz;
1911   PetscInt               istart, iend, jstart, jend, ii, jj;
1912   MPI_Comm               comm;
1913   PetscScalar           *values;
1914   DMBoundaryType         bx, by;
1915   DMDAStencilType        st;
1916   ISLocalToGlobalMapping ltog;
1917 
1918   PetscFunctionBegin;
1919   /*
1920      nc - number of components per grid point
1921      col - number of colors needed in one direction for single component problem
1922   */
1923   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st));
1924   col = 2 * s + 1;
1925 
1926   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1927   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1928   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1929 
1930   PetscCall(PetscMalloc1(col * col * nc * nc, &cols));
1931 
1932   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1933 
1934   /* determine the matrix preallocation information */
1935   MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz);
1936   for (i = xs; i < xs + nx; i++) {
1937     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1938     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1939     for (j = ys; j < ys + ny; j++) {
1940       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1941       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1942       slot   = i - gxs + gnx * (j - gys);
1943 
1944       /* Find block columns in block row */
1945       cnt = 0;
1946       for (ii = istart; ii < iend + 1; ii++) {
1947         for (jj = jstart; jj < jend + 1; jj++) {
1948           if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj;
1949         }
1950       }
1951       PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
1952       PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz));
1953     }
1954   }
1955   PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz));
1956   PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
1957   MatPreallocateEnd(dnz, onz);
1958 
1959   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1960 
1961   /*
1962     For each node in the grid: we get the neighbors in the local (on processor ordering
1963     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1964     PETSc ordering.
1965   */
1966   if (!da->prealloc_only) {
1967     PetscCall(PetscCalloc1(col * col * nc * nc, &values));
1968     for (i = xs; i < xs + nx; i++) {
1969       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1970       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1971       for (j = ys; j < ys + ny; j++) {
1972         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1973         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1974         slot   = i - gxs + gnx * (j - gys);
1975 
1976         /* Find block columns in block row */
1977         cnt = 0;
1978         for (ii = istart; ii < iend + 1; ii++) {
1979           for (jj = jstart; jj < jend + 1; jj++) {
1980             if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj;
1981           }
1982         }
1983         PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
1984         PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
1985       }
1986     }
1987     PetscCall(PetscFree(values));
1988     /* do not copy values to GPU since they are all zero and not yet needed there */
1989     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1990     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1991     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1992     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1993     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1994   }
1995   PetscCall(PetscFree(cols));
1996   PetscFunctionReturn(PETSC_SUCCESS);
1997 }
1998 
1999 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da, Mat J)
2000 {
2001   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
2002   PetscInt               m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz;
2003   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk;
2004   MPI_Comm               comm;
2005   PetscScalar           *values;
2006   DMBoundaryType         bx, by, bz;
2007   DMDAStencilType        st;
2008   ISLocalToGlobalMapping ltog;
2009 
2010   PetscFunctionBegin;
2011   /*
2012      nc - number of components per grid point
2013      col - number of colors needed in one direction for single component problem
2014   */
2015   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st));
2016   col = 2 * s + 1;
2017 
2018   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
2019   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
2020   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
2021 
2022   /* create the matrix */
2023   PetscCall(PetscMalloc1(col * col * col, &cols));
2024 
2025   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
2026 
2027   /* determine the matrix preallocation information */
2028   MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz);
2029   for (i = xs; i < xs + nx; i++) {
2030     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2031     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2032     for (j = ys; j < ys + ny; j++) {
2033       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2034       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2035       for (k = zs; k < zs + nz; k++) {
2036         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2037         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2038 
2039         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2040 
2041         /* Find block columns in block row */
2042         cnt = 0;
2043         for (ii = istart; ii < iend + 1; ii++) {
2044           for (jj = jstart; jj < jend + 1; jj++) {
2045             for (kk = kstart; kk < kend + 1; kk++) {
2046               if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
2047             }
2048           }
2049         }
2050         PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
2051         PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz));
2052       }
2053     }
2054   }
2055   PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz));
2056   PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
2057   MatPreallocateEnd(dnz, onz);
2058 
2059   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
2060 
2061   /*
2062     For each node in the grid: we get the neighbors in the local (on processor ordering
2063     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2064     PETSc ordering.
2065   */
2066   if (!da->prealloc_only) {
2067     PetscCall(PetscCalloc1(col * col * col * nc * nc, &values));
2068     for (i = xs; i < xs + nx; i++) {
2069       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2070       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2071       for (j = ys; j < ys + ny; j++) {
2072         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2073         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2074         for (k = zs; k < zs + nz; k++) {
2075           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2076           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2077 
2078           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2079 
2080           cnt = 0;
2081           for (ii = istart; ii < iend + 1; ii++) {
2082             for (jj = jstart; jj < jend + 1; jj++) {
2083               for (kk = kstart; kk < kend + 1; kk++) {
2084                 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
2085               }
2086             }
2087           }
2088           PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
2089           PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
2090         }
2091       }
2092     }
2093     PetscCall(PetscFree(values));
2094     /* do not copy values to GPU since they are all zero and not yet needed there */
2095     PetscCall(MatBindToCPU(J, PETSC_TRUE));
2096     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
2097     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
2098     PetscCall(MatBindToCPU(J, PETSC_FALSE));
2099     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2100   }
2101   PetscCall(PetscFree(cols));
2102   PetscFunctionReturn(PETSC_SUCCESS);
2103 }
2104 
2105 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da, Mat J)
2106 {
2107   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
2108   PetscInt               m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, *dnz, *onz;
2109   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
2110   DM_DA                 *dd = (DM_DA *)da->data;
2111   PetscInt               ifill_col, *dfill = dd->dfill, *ofill = dd->ofill;
2112   MPI_Comm               comm;
2113   PetscScalar           *values;
2114   DMBoundaryType         bx, by, bz;
2115   ISLocalToGlobalMapping ltog;
2116   DMDAStencilType        st;
2117   PetscBool              removedups = PETSC_FALSE;
2118 
2119   PetscFunctionBegin;
2120   /*
2121          nc - number of components per grid point
2122          col - number of colors needed in one direction for single component problem
2123   */
2124   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
2125   col = 2 * s + 1;
2126 
2127   /*
2128        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
2129        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
2130   */
2131   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
2132   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
2133   if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE;
2134 
2135   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
2136   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
2137   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
2138 
2139   PetscCall(PetscMalloc1(col * col * col * nc, &cols));
2140   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
2141 
2142   /* determine the matrix preallocation information */
2143   MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
2144 
2145   PetscCall(MatSetBlockSize(J, nc));
2146   for (i = xs; i < xs + nx; i++) {
2147     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2148     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2149     for (j = ys; j < ys + ny; j++) {
2150       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2151       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2152       for (k = zs; k < zs + nz; k++) {
2153         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2154         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2155 
2156         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2157 
2158         for (l = 0; l < nc; l++) {
2159           cnt = 0;
2160           for (ii = istart; ii < iend + 1; ii++) {
2161             for (jj = jstart; jj < jend + 1; jj++) {
2162               for (kk = kstart; kk < kend + 1; kk++) {
2163                 if (ii || jj || kk) {
2164                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
2165                     for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2166                   }
2167                 } else {
2168                   if (dfill) {
2169                     for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2170                   } else {
2171                     for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2172                   }
2173                 }
2174               }
2175             }
2176           }
2177           row    = l + nc * (slot);
2178           maxcnt = PetscMax(maxcnt, cnt);
2179           if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
2180           else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
2181         }
2182       }
2183     }
2184   }
2185   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
2186   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
2187   MatPreallocateEnd(dnz, onz);
2188   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
2189 
2190   /*
2191     For each node in the grid: we get the neighbors in the local (on processor ordering
2192     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2193     PETSc ordering.
2194   */
2195   if (!da->prealloc_only) {
2196     PetscCall(PetscCalloc1(maxcnt, &values));
2197     for (i = xs; i < xs + nx; i++) {
2198       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2199       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2200       for (j = ys; j < ys + ny; j++) {
2201         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2202         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2203         for (k = zs; k < zs + nz; k++) {
2204           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2205           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2206 
2207           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2208 
2209           for (l = 0; l < nc; l++) {
2210             cnt = 0;
2211             for (ii = istart; ii < iend + 1; ii++) {
2212               for (jj = jstart; jj < jend + 1; jj++) {
2213                 for (kk = kstart; kk < kend + 1; kk++) {
2214                   if (ii || jj || kk) {
2215                     if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
2216                       for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2217                     }
2218                   } else {
2219                     if (dfill) {
2220                       for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2221                     } else {
2222                       for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2223                     }
2224                   }
2225                 }
2226               }
2227             }
2228             row = l + nc * (slot);
2229             PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, values, INSERT_VALUES));
2230           }
2231         }
2232       }
2233     }
2234     PetscCall(PetscFree(values));
2235     /* do not copy values to GPU since they are all zero and not yet needed there */
2236     PetscCall(MatBindToCPU(J, PETSC_TRUE));
2237     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
2238     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
2239     PetscCall(MatBindToCPU(J, PETSC_FALSE));
2240     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2241   }
2242   PetscCall(PetscFree(cols));
2243   PetscFunctionReturn(PETSC_SUCCESS);
2244 }
2245