xref: /petsc/src/dm/impls/da/fdda.c (revision 4ad8454beace47809662cdae21ee081016eaa39a)
1 #include <petsc/private/dmdaimpl.h> /*I      "petscdmda.h"     I*/
2 #include <petscmat.h>
3 #include <petscbt.h>
4 
5 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM, ISColoringType, ISColoring *);
6 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM, ISColoringType, ISColoring *);
7 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM, ISColoringType, ISColoring *);
8 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM, ISColoringType, ISColoring *);
9 
10 /*
11    For ghost i that may be negative or greater than the upper bound this
12   maps it into the 0:m-1 range using periodicity
13 */
14 #define SetInRange(i, m) ((i < 0) ? m + i : ((i >= m) ? i - m : i))
15 
16 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill, PetscInt w, PetscInt **rfill)
17 {
18   PetscInt i, j, nz, *fill;
19 
20   PetscFunctionBegin;
21   if (!dfill) PetscFunctionReturn(PETSC_SUCCESS);
22 
23   /* count number nonzeros */
24   nz = 0;
25   for (i = 0; i < w; i++) {
26     for (j = 0; j < w; j++) {
27       if (dfill[w * i + j]) nz++;
28     }
29   }
30   PetscCall(PetscMalloc1(nz + w + 1, &fill));
31   /* construct modified CSR storage of nonzero structure */
32   /*  fill[0 -- w] marks starts of each row of column indices (and end of last row)
33    so fill[1] - fill[0] gives number of nonzeros in first row etc */
34   nz = w + 1;
35   for (i = 0; i < w; i++) {
36     fill[i] = nz;
37     for (j = 0; j < w; j++) {
38       if (dfill[w * i + j]) {
39         fill[nz] = j;
40         nz++;
41       }
42     }
43   }
44   fill[w] = nz;
45 
46   *rfill = fill;
47   PetscFunctionReturn(PETSC_SUCCESS);
48 }
49 
50 static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse, PetscInt w, PetscInt **rfill)
51 {
52   PetscInt nz;
53 
54   PetscFunctionBegin;
55   if (!dfillsparse) PetscFunctionReturn(PETSC_SUCCESS);
56 
57   /* Determine number of non-zeros */
58   nz = (dfillsparse[w] - w - 1);
59 
60   /* Allocate space for our copy of the given sparse matrix representation. */
61   PetscCall(PetscMalloc1(nz + w + 1, rfill));
62   PetscCall(PetscArraycpy(*rfill, dfillsparse, nz + w + 1));
63   PetscFunctionReturn(PETSC_SUCCESS);
64 }
65 
66 static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd)
67 {
68   PetscInt i, k, cnt = 1;
69 
70   PetscFunctionBegin;
71   /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of
72    columns to the left with any nonzeros in them plus 1 */
73   PetscCall(PetscCalloc1(dd->w, &dd->ofillcols));
74   for (i = 0; i < dd->w; i++) {
75     for (k = dd->ofill[i]; k < dd->ofill[i + 1]; k++) dd->ofillcols[dd->ofill[k]] = 1;
76   }
77   for (i = 0; i < dd->w; i++) {
78     if (dd->ofillcols[i]) dd->ofillcols[i] = cnt++;
79   }
80   PetscFunctionReturn(PETSC_SUCCESS);
81 }
82 
83 /*@
84   DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem
85   of the matrix returned by `DMCreateMatrix()`.
86 
87   Logically Collective
88 
89   Input Parameters:
90 + da    - the distributed array
91 . dfill - the fill pattern in the diagonal block (may be `NULL`, means use dense block)
92 - ofill - the fill pattern in the off-diagonal blocks
93 
94   Level: developer
95 
96   Notes:
97   This only makes sense when you are doing multicomponent problems but using the
98   `MATMPIAIJ` matrix format
99 
100   The format for `dfill` and `ofill` is a 2 dimensional dof by dof matrix with 1 entries
101   representing coupling and 0 entries for missing coupling. For example
102 .vb
103             dfill[9] = {1, 0, 0,
104                         1, 1, 0,
105                         0, 1, 1}
106 .ve
107   means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with
108   itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the
109   diagonal block).
110 
111   `DMDASetGetMatrix()` allows you to provide general code for those more complicated nonzero patterns then
112   can be represented in the `dfill`, `ofill` format
113 
114   Contributed by\:
115   Glenn Hammond
116 
117 .seealso: [](sec_struct), `DM`, `DMDA`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()`, `DMDASetBlockFillsSparse()`
118 @*/
119 PetscErrorCode DMDASetBlockFills(DM da, const PetscInt *dfill, const PetscInt *ofill)
120 {
121   DM_DA *dd = (DM_DA *)da->data;
122 
123   PetscFunctionBegin;
124   /* save the given dfill and ofill information */
125   PetscCall(DMDASetBlockFills_Private(dfill, dd->w, &dd->dfill));
126   PetscCall(DMDASetBlockFills_Private(ofill, dd->w, &dd->ofill));
127 
128   /* count nonzeros in ofill columns */
129   PetscCall(DMDASetBlockFills_Private2(dd));
130   PetscFunctionReturn(PETSC_SUCCESS);
131 }
132 
133 /*@
134   DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem
135   of the matrix returned by `DMCreateMatrix()`, using sparse representations
136   of fill patterns.
137 
138   Logically Collective
139 
140   Input Parameters:
141 + da          - the distributed array
142 . dfillsparse - the sparse fill pattern in the diagonal block (may be `NULL`, means use dense block)
143 - ofillsparse - the sparse fill pattern in the off-diagonal blocks
144 
145   Level: developer
146 
147   Notes:
148   This only makes sense when you are doing multicomponent problems but using the
149   `MATMPIAIJ` matrix format
150 
151   The format for `dfill` and `ofill` is a sparse representation of a
152   dof-by-dof matrix with 1 entries representing coupling and 0 entries
153   for missing coupling.  The sparse representation is a 1 dimensional
154   array of length nz + dof + 1, where nz is the number of non-zeros in
155   the matrix.  The first dof entries in the array give the
156   starting array indices of each row's items in the rest of the array,
157   the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array)
158   and the remaining nz items give the column indices of each of
159   the 1s within the logical 2D matrix.  Each row's items within
160   the array are the column indices of the 1s within that row
161   of the 2D matrix.  PETSc developers may recognize that this is the
162   same format as that computed by the `DMDASetBlockFills_Private()`
163   function from a dense 2D matrix representation.
164 
165   `DMDASetGetMatrix()` allows you to provide general code for those more complicated nonzero patterns then
166   can be represented in the `dfill`, `ofill` format
167 
168   Contributed by\:
169   Philip C. Roth
170 
171 .seealso: [](sec_struct), `DM`, `DMDA`, `DMDASetBlockFills()`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()`
172 @*/
173 PetscErrorCode DMDASetBlockFillsSparse(DM da, const PetscInt *dfillsparse, const PetscInt *ofillsparse)
174 {
175   DM_DA *dd = (DM_DA *)da->data;
176 
177   PetscFunctionBegin;
178   /* save the given dfill and ofill information */
179   PetscCall(DMDASetBlockFillsSparse_Private(dfillsparse, dd->w, &dd->dfill));
180   PetscCall(DMDASetBlockFillsSparse_Private(ofillsparse, dd->w, &dd->ofill));
181 
182   /* count nonzeros in ofill columns */
183   PetscCall(DMDASetBlockFills_Private2(dd));
184   PetscFunctionReturn(PETSC_SUCCESS);
185 }
186 
187 PetscErrorCode DMCreateColoring_DA(DM da, ISColoringType ctype, ISColoring *coloring)
188 {
189   PetscInt       dim, m, n, p, nc;
190   DMBoundaryType bx, by, bz;
191   MPI_Comm       comm;
192   PetscMPIInt    size;
193   PetscBool      isBAIJ;
194   DM_DA         *dd = (DM_DA *)da->data;
195 
196   PetscFunctionBegin;
197   /*
198                                   m
199           ------------------------------------------------------
200          |                                                     |
201          |                                                     |
202          |               ----------------------                |
203          |               |                    |                |
204       n  |           yn  |                    |                |
205          |               |                    |                |
206          |               .---------------------                |
207          |             (xs,ys)     xn                          |
208          |            .                                        |
209          |         (gxs,gys)                                   |
210          |                                                     |
211           -----------------------------------------------------
212   */
213 
214   /*
215          nc - number of components per grid point
216          col - number of colors needed in one direction for single component problem
217 
218   */
219   PetscCall(DMDAGetInfo(da, &dim, NULL, NULL, NULL, &m, &n, &p, &nc, NULL, &bx, &by, &bz, NULL));
220 
221   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
222   PetscCallMPI(MPI_Comm_size(comm, &size));
223   if (ctype == IS_COLORING_LOCAL) {
224     if (size == 1) {
225       ctype = IS_COLORING_GLOBAL;
226     } else {
227       PetscCheck((dim == 1) || !((m == 1 && bx == DM_BOUNDARY_PERIODIC) || (n == 1 && by == DM_BOUNDARY_PERIODIC) || (p == 1 && bz == DM_BOUNDARY_PERIODIC)), PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain on the same process");
228     }
229   }
230 
231   /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ
232      matrices is for the blocks, not the individual matrix elements  */
233   PetscCall(PetscStrbeginswith(da->mattype, MATBAIJ, &isBAIJ));
234   if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATMPIBAIJ, &isBAIJ));
235   if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATSEQBAIJ, &isBAIJ));
236   if (isBAIJ) {
237     dd->w  = 1;
238     dd->xs = dd->xs / nc;
239     dd->xe = dd->xe / nc;
240     dd->Xs = dd->Xs / nc;
241     dd->Xe = dd->Xe / nc;
242   }
243 
244   /*
245      We do not provide a getcoloring function in the DMDA operations because
246    the basic DMDA does not know about matrices. We think of DMDA as being
247    more low-level then matrices.
248   */
249   if (dim == 1) PetscCall(DMCreateColoring_DA_1d_MPIAIJ(da, ctype, coloring));
250   else if (dim == 2) PetscCall(DMCreateColoring_DA_2d_MPIAIJ(da, ctype, coloring));
251   else if (dim == 3) PetscCall(DMCreateColoring_DA_3d_MPIAIJ(da, ctype, coloring));
252   else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not done for %" PetscInt_FMT " dimension, send us mail petsc-maint@mcs.anl.gov for code", dim);
253   if (isBAIJ) {
254     dd->w  = nc;
255     dd->xs = dd->xs * nc;
256     dd->xe = dd->xe * nc;
257     dd->Xs = dd->Xs * nc;
258     dd->Xe = dd->Xe * nc;
259   }
260   PetscFunctionReturn(PETSC_SUCCESS);
261 }
262 
263 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
264 {
265   PetscInt         xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, M, N, dim, s, k, nc, col;
266   PetscInt         ncolors = 0;
267   MPI_Comm         comm;
268   DMBoundaryType   bx, by;
269   DMDAStencilType  st;
270   ISColoringValue *colors;
271   DM_DA           *dd = (DM_DA *)da->data;
272 
273   PetscFunctionBegin;
274   /*
275          nc - number of components per grid point
276          col - number of colors needed in one direction for single component problem
277 
278   */
279   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st));
280   col = 2 * s + 1;
281   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
282   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
283   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
284 
285   /* special case as taught to us by Paul Hovland */
286   if (st == DMDA_STENCIL_STAR && s == 1) {
287     PetscCall(DMCreateColoring_DA_2d_5pt_MPIAIJ(da, ctype, coloring));
288   } else {
289     if (ctype == IS_COLORING_GLOBAL) {
290       if (!dd->localcoloring) {
291         PetscCall(PetscMalloc1(nc * nx * ny, &colors));
292         ii = 0;
293         for (j = ys; j < ys + ny; j++) {
294           for (i = xs; i < xs + nx; i++) {
295             for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((i % col) + col * (j % col));
296           }
297         }
298         ncolors = nc + nc * (col - 1 + col * (col - 1));
299         PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring));
300       }
301       *coloring = dd->localcoloring;
302     } else if (ctype == IS_COLORING_LOCAL) {
303       if (!dd->ghostedcoloring) {
304         PetscCall(PetscMalloc1(nc * gnx * gny, &colors));
305         ii = 0;
306         for (j = gys; j < gys + gny; j++) {
307           for (i = gxs; i < gxs + gnx; i++) {
308             for (k = 0; k < nc; k++) {
309               /* the complicated stuff is to handle periodic boundaries */
310               colors[ii++] = k + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col));
311             }
312           }
313         }
314         ncolors = nc + nc * (col - 1 + col * (col - 1));
315         PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
316         /* PetscIntView(ncolors,(PetscInt*)colors,0); */
317 
318         PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
319       }
320       *coloring = dd->ghostedcoloring;
321     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
322   }
323   PetscCall(ISColoringReference(*coloring));
324   PetscFunctionReturn(PETSC_SUCCESS);
325 }
326 
327 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
328 {
329   PetscInt         xs, ys, nx, ny, i, j, gxs, gys, gnx, gny, m, n, p, dim, s, k, nc, col, zs, gzs, ii, l, nz, gnz, M, N, P;
330   PetscInt         ncolors;
331   MPI_Comm         comm;
332   DMBoundaryType   bx, by, bz;
333   DMDAStencilType  st;
334   ISColoringValue *colors;
335   DM_DA           *dd = (DM_DA *)da->data;
336 
337   PetscFunctionBegin;
338   /*
339          nc - number of components per grid point
340          col - number of colors needed in one direction for single component problem
341   */
342   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
343   col = 2 * s + 1;
344   PetscCheck(bx != DM_BOUNDARY_PERIODIC || (m % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in X is divisible by 2*stencil_width + 1");
345   PetscCheck(by != DM_BOUNDARY_PERIODIC || (n % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Y is divisible by 2*stencil_width + 1");
346   PetscCheck(bz != DM_BOUNDARY_PERIODIC || (p % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Z is divisible by 2*stencil_width + 1");
347 
348   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
349   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
350   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
351 
352   /* create the coloring */
353   if (ctype == IS_COLORING_GLOBAL) {
354     if (!dd->localcoloring) {
355       PetscCall(PetscMalloc1(nc * nx * ny * nz, &colors));
356       ii = 0;
357       for (k = zs; k < zs + nz; k++) {
358         for (j = ys; j < ys + ny; j++) {
359           for (i = xs; i < xs + nx; i++) {
360             for (l = 0; l < nc; l++) colors[ii++] = l + nc * ((i % col) + col * (j % col) + col * col * (k % col));
361           }
362         }
363       }
364       ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1));
365       PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny * nz, colors, PETSC_OWN_POINTER, &dd->localcoloring));
366     }
367     *coloring = dd->localcoloring;
368   } else if (ctype == IS_COLORING_LOCAL) {
369     if (!dd->ghostedcoloring) {
370       PetscCall(PetscMalloc1(nc * gnx * gny * gnz, &colors));
371       ii = 0;
372       for (k = gzs; k < gzs + gnz; k++) {
373         for (j = gys; j < gys + gny; j++) {
374           for (i = gxs; i < gxs + gnx; i++) {
375             for (l = 0; l < nc; l++) {
376               /* the complicated stuff is to handle periodic boundaries */
377               colors[ii++] = l + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col) + col * col * (SetInRange(k, p) % col));
378             }
379           }
380         }
381       }
382       ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1));
383       PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny * gnz, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
384       PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
385     }
386     *coloring = dd->ghostedcoloring;
387   } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
388   PetscCall(ISColoringReference(*coloring));
389   PetscFunctionReturn(PETSC_SUCCESS);
390 }
391 
392 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
393 {
394   PetscInt         xs, nx, i, i1, gxs, gnx, l, m, M, dim, s, nc, col;
395   PetscInt         ncolors;
396   MPI_Comm         comm;
397   DMBoundaryType   bx;
398   ISColoringValue *colors;
399   DM_DA           *dd = (DM_DA *)da->data;
400 
401   PetscFunctionBegin;
402   /*
403          nc - number of components per grid point
404          col - number of colors needed in one direction for single component problem
405   */
406   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, &M, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
407   col = 2 * s + 1;
408   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
409   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
410   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
411 
412   /* create the coloring */
413   if (ctype == IS_COLORING_GLOBAL) {
414     if (!dd->localcoloring) {
415       PetscCall(PetscMalloc1(nc * nx, &colors));
416       if (dd->ofillcols) {
417         PetscInt tc = 0;
418         for (i = 0; i < nc; i++) tc += (PetscInt)(dd->ofillcols[i] > 0);
419         i1 = 0;
420         for (i = xs; i < xs + nx; i++) {
421           for (l = 0; l < nc; l++) {
422             if (dd->ofillcols[l] && (i % col)) {
423               colors[i1++] = nc - 1 + tc * ((i % col) - 1) + dd->ofillcols[l];
424             } else {
425               colors[i1++] = l;
426             }
427           }
428         }
429         ncolors = nc + 2 * s * tc;
430       } else {
431         i1 = 0;
432         for (i = xs; i < xs + nx; i++) {
433           for (l = 0; l < nc; l++) colors[i1++] = l + nc * (i % col);
434         }
435         ncolors = nc + nc * (col - 1);
436       }
437       PetscCall(ISColoringCreate(comm, ncolors, nc * nx, colors, PETSC_OWN_POINTER, &dd->localcoloring));
438     }
439     *coloring = dd->localcoloring;
440   } else if (ctype == IS_COLORING_LOCAL) {
441     if (!dd->ghostedcoloring) {
442       PetscCall(PetscMalloc1(nc * gnx, &colors));
443       i1 = 0;
444       for (i = gxs; i < gxs + gnx; i++) {
445         for (l = 0; l < nc; l++) {
446           /* the complicated stuff is to handle periodic boundaries */
447           colors[i1++] = l + nc * (SetInRange(i, m) % col);
448         }
449       }
450       ncolors = nc + nc * (col - 1);
451       PetscCall(ISColoringCreate(comm, ncolors, nc * gnx, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
452       PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
453     }
454     *coloring = dd->ghostedcoloring;
455   } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
456   PetscCall(ISColoringReference(*coloring));
457   PetscFunctionReturn(PETSC_SUCCESS);
458 }
459 
460 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
461 {
462   PetscInt         xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, dim, s, k, nc;
463   PetscInt         ncolors;
464   MPI_Comm         comm;
465   DMBoundaryType   bx, by;
466   ISColoringValue *colors;
467   DM_DA           *dd = (DM_DA *)da->data;
468 
469   PetscFunctionBegin;
470   /*
471          nc - number of components per grid point
472          col - number of colors needed in one direction for single component problem
473   */
474   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, NULL));
475   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
476   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
477   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
478   /* create the coloring */
479   if (ctype == IS_COLORING_GLOBAL) {
480     if (!dd->localcoloring) {
481       PetscCall(PetscMalloc1(nc * nx * ny, &colors));
482       ii = 0;
483       for (j = ys; j < ys + ny; j++) {
484         for (i = xs; i < xs + nx; i++) {
485           for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((3 * j + i) % 5);
486         }
487       }
488       ncolors = 5 * nc;
489       PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring));
490     }
491     *coloring = dd->localcoloring;
492   } else if (ctype == IS_COLORING_LOCAL) {
493     if (!dd->ghostedcoloring) {
494       PetscCall(PetscMalloc1(nc * gnx * gny, &colors));
495       ii = 0;
496       for (j = gys; j < gys + gny; j++) {
497         for (i = gxs; i < gxs + gnx; i++) {
498           for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((3 * SetInRange(j, n) + SetInRange(i, m)) % 5);
499         }
500       }
501       ncolors = 5 * nc;
502       PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
503       PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
504     }
505     *coloring = dd->ghostedcoloring;
506   } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
507   PetscFunctionReturn(PETSC_SUCCESS);
508 }
509 
510 /* =========================================================================== */
511 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM, Mat);
512 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM, Mat);
513 extern PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM, Mat);
514 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM, Mat);
515 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM, Mat);
516 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM, Mat);
517 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM, Mat);
518 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM, Mat);
519 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM, Mat);
520 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM, Mat);
521 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM, Mat);
522 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM, Mat);
523 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM, Mat);
524 extern PetscErrorCode DMCreateMatrix_DA_IS(DM, Mat);
525 
526 static PetscErrorCode MatView_MPI_DA(Mat A, PetscViewer viewer)
527 {
528   DM                da;
529   const char       *prefix;
530   Mat               AA, Anatural;
531   AO                ao;
532   PetscInt          rstart, rend, *petsc, i;
533   IS                is;
534   MPI_Comm          comm;
535   PetscViewerFormat format;
536   PetscBool         flag;
537 
538   PetscFunctionBegin;
539   /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */
540   PetscCall(PetscViewerGetFormat(viewer, &format));
541   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(PETSC_SUCCESS);
542 
543   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
544   PetscCall(MatGetDM(A, &da));
545   PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA");
546 
547   PetscCall(DMDAGetAO(da, &ao));
548   PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
549   PetscCall(PetscMalloc1(rend - rstart, &petsc));
550   for (i = rstart; i < rend; i++) petsc[i - rstart] = i;
551   PetscCall(AOApplicationToPetsc(ao, rend - rstart, petsc));
552   PetscCall(ISCreateGeneral(comm, rend - rstart, petsc, PETSC_OWN_POINTER, &is));
553 
554   /* call viewer on natural ordering */
555   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPISELL, &flag));
556   if (flag) {
557     PetscCall(MatConvert(A, MATAIJ, MAT_INITIAL_MATRIX, &AA));
558     A = AA;
559   }
560   PetscCall(MatCreateSubMatrix(A, is, is, MAT_INITIAL_MATRIX, &Anatural));
561   PetscCall(ISDestroy(&is));
562   PetscCall(PetscObjectGetOptionsPrefix((PetscObject)A, &prefix));
563   PetscCall(PetscObjectSetOptionsPrefix((PetscObject)Anatural, prefix));
564   PetscCall(PetscObjectSetName((PetscObject)Anatural, ((PetscObject)A)->name));
565   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE;
566   PetscCall(MatView(Anatural, viewer));
567   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE;
568   PetscCall(MatDestroy(&Anatural));
569   if (flag) PetscCall(MatDestroy(&AA));
570   PetscFunctionReturn(PETSC_SUCCESS);
571 }
572 
573 static PetscErrorCode MatLoad_MPI_DA(Mat A, PetscViewer viewer)
574 {
575   DM       da;
576   Mat      Anatural, Aapp;
577   AO       ao;
578   PetscInt rstart, rend, *app, i, m, n, M, N;
579   IS       is;
580   MPI_Comm comm;
581 
582   PetscFunctionBegin;
583   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
584   PetscCall(MatGetDM(A, &da));
585   PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA");
586 
587   /* Load the matrix in natural ordering */
588   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Anatural));
589   PetscCall(MatSetType(Anatural, ((PetscObject)A)->type_name));
590   PetscCall(MatGetSize(A, &M, &N));
591   PetscCall(MatGetLocalSize(A, &m, &n));
592   PetscCall(MatSetSizes(Anatural, m, n, M, N));
593   PetscCall(MatLoad(Anatural, viewer));
594 
595   /* Map natural ordering to application ordering and create IS */
596   PetscCall(DMDAGetAO(da, &ao));
597   PetscCall(MatGetOwnershipRange(Anatural, &rstart, &rend));
598   PetscCall(PetscMalloc1(rend - rstart, &app));
599   for (i = rstart; i < rend; i++) app[i - rstart] = i;
600   PetscCall(AOPetscToApplication(ao, rend - rstart, app));
601   PetscCall(ISCreateGeneral(comm, rend - rstart, app, PETSC_OWN_POINTER, &is));
602 
603   /* Do permutation and replace header */
604   PetscCall(MatCreateSubMatrix(Anatural, is, is, MAT_INITIAL_MATRIX, &Aapp));
605   PetscCall(MatHeaderReplace(A, &Aapp));
606   PetscCall(ISDestroy(&is));
607   PetscCall(MatDestroy(&Anatural));
608   PetscFunctionReturn(PETSC_SUCCESS);
609 }
610 
611 PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J)
612 {
613   PetscInt    dim, dof, nx, ny, nz, dims[3], starts[3], M, N, P;
614   Mat         A;
615   MPI_Comm    comm;
616   MatType     Atype;
617   MatType     mtype;
618   PetscMPIInt size;
619   DM_DA      *dd    = (DM_DA *)da->data;
620   void (*aij)(void) = NULL, (*baij)(void) = NULL, (*sbaij)(void) = NULL, (*sell)(void) = NULL, (*is)(void) = NULL;
621 
622   PetscFunctionBegin;
623   PetscCall(MatInitializePackage());
624   mtype = da->mattype;
625 
626   /*
627                                   m
628           ------------------------------------------------------
629          |                                                     |
630          |                                                     |
631          |               ----------------------                |
632          |               |                    |                |
633       n  |           ny  |                    |                |
634          |               |                    |                |
635          |               .---------------------                |
636          |             (xs,ys)     nx                          |
637          |            .                                        |
638          |         (gxs,gys)                                   |
639          |                                                     |
640           -----------------------------------------------------
641   */
642 
643   /*
644          nc - number of components per grid point
645          col - number of colors needed in one direction for single component problem
646   */
647   M   = dd->M;
648   N   = dd->N;
649   P   = dd->P;
650   dim = da->dim;
651   dof = dd->w;
652   /* PetscCall(DMDAGetInfo(da,&dim,&M,&N,&P,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL)); */
653   PetscCall(DMDAGetCorners(da, NULL, NULL, NULL, &nx, &ny, &nz));
654   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
655   PetscCall(MatCreate(comm, &A));
656   PetscCall(MatSetSizes(A, dof * nx * ny * nz, dof * nx * ny * nz, dof * M * N * P, dof * M * N * P));
657   PetscCall(MatSetType(A, mtype));
658   PetscCall(MatSetFromOptions(A));
659   if (dof * nx * ny * nz < da->bind_below) {
660     PetscCall(MatSetBindingPropagates(A, PETSC_TRUE));
661     PetscCall(MatBindToCPU(A, PETSC_TRUE));
662   }
663   PetscCall(MatSetDM(A, da));
664   if (da->structure_only) PetscCall(MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE));
665   PetscCall(MatGetType(A, &Atype));
666   /*
667      We do not provide a getmatrix function in the DMDA operations because
668    the basic DMDA does not know about matrices. We think of DMDA as being more
669    more low-level than matrices. This is kind of cheating but, cause sometimes
670    we think of DMDA has higher level than matrices.
671 
672      We could switch based on Atype (or mtype), but we do not since the
673    specialized setting routines depend only on the particular preallocation
674    details of the matrix, not the type itself.
675   */
676   if (!da->prealloc_skip) { // Flag is likely set when user intends to use MatSetPreallocationCOO()
677     PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJSetPreallocation_C", &aij));
678     if (!aij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJSetPreallocation_C", &aij));
679     if (!aij) {
680       PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIBAIJSetPreallocation_C", &baij));
681       if (!baij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", &baij));
682       if (!baij) {
683         PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPISBAIJSetPreallocation_C", &sbaij));
684         if (!sbaij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqSBAIJSetPreallocation_C", &sbaij));
685         if (!sbaij) {
686           PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPISELLSetPreallocation_C", &sell));
687           if (!sell) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqSELLSetPreallocation_C", &sell));
688         }
689         if (!sell) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatISSetPreallocation_C", &is));
690       }
691     }
692   }
693   if (aij) {
694     if (dim == 1) {
695       if (dd->ofill) {
696         PetscCall(DMCreateMatrix_DA_1d_MPIAIJ_Fill(da, A));
697       } else {
698         DMBoundaryType bx;
699         PetscMPIInt    size;
700         PetscCall(DMDAGetInfo(da, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &bx, NULL, NULL, NULL));
701         PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size));
702         if (size == 1 && bx == DM_BOUNDARY_NONE) {
703           PetscCall(DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(da, A));
704         } else {
705           PetscCall(DMCreateMatrix_DA_1d_MPIAIJ(da, A));
706         }
707       }
708     } else if (dim == 2) {
709       if (dd->ofill) {
710         PetscCall(DMCreateMatrix_DA_2d_MPIAIJ_Fill(da, A));
711       } else {
712         PetscCall(DMCreateMatrix_DA_2d_MPIAIJ(da, A));
713       }
714     } else if (dim == 3) {
715       if (dd->ofill) {
716         PetscCall(DMCreateMatrix_DA_3d_MPIAIJ_Fill(da, A));
717       } else {
718         PetscCall(DMCreateMatrix_DA_3d_MPIAIJ(da, A));
719       }
720     }
721   } else if (baij) {
722     if (dim == 2) {
723       PetscCall(DMCreateMatrix_DA_2d_MPIBAIJ(da, A));
724     } else if (dim == 3) {
725       PetscCall(DMCreateMatrix_DA_3d_MPIBAIJ(da, A));
726     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
727   } else if (sbaij) {
728     if (dim == 2) {
729       PetscCall(DMCreateMatrix_DA_2d_MPISBAIJ(da, A));
730     } else if (dim == 3) {
731       PetscCall(DMCreateMatrix_DA_3d_MPISBAIJ(da, A));
732     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
733   } else if (sell) {
734     if (dim == 2) {
735       PetscCall(DMCreateMatrix_DA_2d_MPISELL(da, A));
736     } else if (dim == 3) {
737       PetscCall(DMCreateMatrix_DA_3d_MPISELL(da, A));
738     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
739   } else if (is) {
740     PetscCall(DMCreateMatrix_DA_IS(da, A));
741   } else { // unknown type or da->prealloc_skip so structural information may be needed, but no prealloc
742     ISLocalToGlobalMapping ltog;
743 
744     PetscCall(MatSetBlockSize(A, dof));
745     PetscCall(MatSetUp(A));
746     PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
747     PetscCall(MatSetLocalToGlobalMapping(A, ltog, ltog));
748   }
749   PetscCall(DMDAGetGhostCorners(da, &starts[0], &starts[1], &starts[2], &dims[0], &dims[1], &dims[2]));
750   PetscCall(MatSetStencil(A, dim, dims, starts, dof));
751   PetscCall(MatSetDM(A, da));
752   PetscCallMPI(MPI_Comm_size(comm, &size));
753   if (size > 1) {
754     /* change viewer to display matrix in natural ordering */
755     PetscCall(MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA));
756     PetscCall(MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA));
757   }
758   *J = A;
759   PetscFunctionReturn(PETSC_SUCCESS);
760 }
761 
762 PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]);
763 
764 PetscErrorCode DMCreateMatrix_DA_IS(DM dm, Mat J)
765 {
766   DM_DA                 *da = (DM_DA *)dm->data;
767   Mat                    lJ, P;
768   ISLocalToGlobalMapping ltog;
769   IS                     is;
770   PetscBT                bt;
771   const PetscInt        *e_loc, *idx;
772   PetscInt               i, nel, nen, nv, dof, *gidx, n, N;
773 
774   /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted)
775      We need to filter out the local indices that are not represented through the DMDAGetElements decomposition */
776   PetscFunctionBegin;
777   dof = da->w;
778   PetscCall(MatSetBlockSize(J, dof));
779   PetscCall(DMGetLocalToGlobalMapping(dm, &ltog));
780 
781   /* flag local elements indices in local DMDA numbering */
782   PetscCall(ISLocalToGlobalMappingGetSize(ltog, &nv));
783   PetscCall(PetscBTCreate(nv / dof, &bt));
784   PetscCall(DMDAGetElements(dm, &nel, &nen, &e_loc)); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */
785   for (i = 0; i < nel * nen; i++) PetscCall(PetscBTSet(bt, e_loc[i]));
786 
787   /* filter out (set to -1) the global indices not used by the local elements */
788   PetscCall(PetscMalloc1(nv / dof, &gidx));
789   PetscCall(ISLocalToGlobalMappingGetBlockIndices(ltog, &idx));
790   PetscCall(PetscArraycpy(gidx, idx, nv / dof));
791   PetscCall(ISLocalToGlobalMappingRestoreBlockIndices(ltog, &idx));
792   for (i = 0; i < nv / dof; i++)
793     if (!PetscBTLookup(bt, i)) gidx[i] = -1;
794   PetscCall(PetscBTDestroy(&bt));
795   PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)dm), dof, nv / dof, gidx, PETSC_OWN_POINTER, &is));
796   PetscCall(ISLocalToGlobalMappingCreateIS(is, &ltog));
797   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
798   PetscCall(ISLocalToGlobalMappingDestroy(&ltog));
799   PetscCall(ISDestroy(&is));
800 
801   /* Preallocation */
802   if (dm->prealloc_skip) {
803     PetscCall(MatSetUp(J));
804   } else {
805     PetscCall(MatISGetLocalMat(J, &lJ));
806     PetscCall(MatGetLocalToGlobalMapping(lJ, &ltog, NULL));
807     PetscCall(MatCreate(PetscObjectComm((PetscObject)lJ), &P));
808     PetscCall(MatSetType(P, MATPREALLOCATOR));
809     PetscCall(MatSetLocalToGlobalMapping(P, ltog, ltog));
810     PetscCall(MatGetSize(lJ, &N, NULL));
811     PetscCall(MatGetLocalSize(lJ, &n, NULL));
812     PetscCall(MatSetSizes(P, n, n, N, N));
813     PetscCall(MatSetBlockSize(P, dof));
814     PetscCall(MatSetUp(P));
815     for (i = 0; i < nel; i++) PetscCall(MatSetValuesBlockedLocal(P, nen, e_loc + i * nen, nen, e_loc + i * nen, NULL, INSERT_VALUES));
816     PetscCall(MatPreallocatorPreallocate(P, (PetscBool)!da->prealloc_only, lJ));
817     PetscCall(MatISRestoreLocalMat(J, &lJ));
818     PetscCall(DMDARestoreElements(dm, &nel, &nen, &e_loc));
819     PetscCall(MatDestroy(&P));
820 
821     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
822     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
823   }
824   PetscFunctionReturn(PETSC_SUCCESS);
825 }
826 
827 PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da, Mat J)
828 {
829   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p;
830   PetscInt               lstart, lend, pstart, pend, *dnz, *onz;
831   MPI_Comm               comm;
832   PetscScalar           *values;
833   DMBoundaryType         bx, by;
834   ISLocalToGlobalMapping ltog;
835   DMDAStencilType        st;
836 
837   PetscFunctionBegin;
838   /*
839          nc - number of components per grid point
840          col - number of colors needed in one direction for single component problem
841   */
842   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st));
843   col = 2 * s + 1;
844   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
845   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
846   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
847 
848   PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols));
849   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
850 
851   PetscCall(MatSetBlockSize(J, nc));
852   /* determine the matrix preallocation information */
853   MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
854   for (i = xs; i < xs + nx; i++) {
855     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
856     pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
857 
858     for (j = ys; j < ys + ny; j++) {
859       slot = i - gxs + gnx * (j - gys);
860 
861       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
862       lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
863 
864       cnt = 0;
865       for (k = 0; k < nc; k++) {
866         for (l = lstart; l < lend + 1; l++) {
867           for (p = pstart; p < pend + 1; p++) {
868             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
869               cols[cnt++] = k + nc * (slot + gnx * l + p);
870             }
871           }
872         }
873         rows[k] = k + nc * (slot);
874       }
875       PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
876     }
877   }
878   PetscCall(MatSetBlockSize(J, nc));
879   PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz));
880   PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz));
881   MatPreallocateEnd(dnz, onz);
882 
883   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
884 
885   /*
886     For each node in the grid: we get the neighbors in the local (on processor ordering
887     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
888     PETSc ordering.
889   */
890   if (!da->prealloc_only) {
891     PetscCall(PetscCalloc1(col * col * nc * nc, &values));
892     for (i = xs; i < xs + nx; i++) {
893       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
894       pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
895 
896       for (j = ys; j < ys + ny; j++) {
897         slot = i - gxs + gnx * (j - gys);
898 
899         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
900         lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
901 
902         cnt = 0;
903         for (k = 0; k < nc; k++) {
904           for (l = lstart; l < lend + 1; l++) {
905             for (p = pstart; p < pend + 1; p++) {
906               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
907                 cols[cnt++] = k + nc * (slot + gnx * l + p);
908               }
909             }
910           }
911           rows[k] = k + nc * (slot);
912         }
913         PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES));
914       }
915     }
916     PetscCall(PetscFree(values));
917     /* do not copy values to GPU since they are all zero and not yet needed there */
918     PetscCall(MatBindToCPU(J, PETSC_TRUE));
919     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
920     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
921     PetscCall(MatBindToCPU(J, PETSC_FALSE));
922     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
923   }
924   PetscCall(PetscFree2(rows, cols));
925   PetscFunctionReturn(PETSC_SUCCESS);
926 }
927 
928 PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da, Mat J)
929 {
930   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
931   PetscInt               m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL;
932   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
933   MPI_Comm               comm;
934   PetscScalar           *values;
935   DMBoundaryType         bx, by, bz;
936   ISLocalToGlobalMapping ltog;
937   DMDAStencilType        st;
938 
939   PetscFunctionBegin;
940   /*
941          nc - number of components per grid point
942          col - number of colors needed in one direction for single component problem
943   */
944   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
945   col = 2 * s + 1;
946   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
947   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
948   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
949 
950   PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols));
951   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
952 
953   PetscCall(MatSetBlockSize(J, nc));
954   /* determine the matrix preallocation information */
955   MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
956   for (i = xs; i < xs + nx; i++) {
957     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
958     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
959     for (j = ys; j < ys + ny; j++) {
960       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
961       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
962       for (k = zs; k < zs + nz; k++) {
963         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
964         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
965 
966         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
967 
968         cnt = 0;
969         for (l = 0; l < nc; l++) {
970           for (ii = istart; ii < iend + 1; ii++) {
971             for (jj = jstart; jj < jend + 1; jj++) {
972               for (kk = kstart; kk < kend + 1; kk++) {
973                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
974                   cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
975                 }
976               }
977             }
978           }
979           rows[l] = l + nc * (slot);
980         }
981         PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
982       }
983     }
984   }
985   PetscCall(MatSetBlockSize(J, nc));
986   PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz));
987   PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz));
988   MatPreallocateEnd(dnz, onz);
989   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
990 
991   /*
992     For each node in the grid: we get the neighbors in the local (on processor ordering
993     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
994     PETSc ordering.
995   */
996   if (!da->prealloc_only) {
997     PetscCall(PetscCalloc1(col * col * col * nc * nc * nc, &values));
998     for (i = xs; i < xs + nx; i++) {
999       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1000       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1001       for (j = ys; j < ys + ny; j++) {
1002         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1003         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1004         for (k = zs; k < zs + nz; k++) {
1005           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1006           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1007 
1008           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1009 
1010           cnt = 0;
1011           for (l = 0; l < nc; l++) {
1012             for (ii = istart; ii < iend + 1; ii++) {
1013               for (jj = jstart; jj < jend + 1; jj++) {
1014                 for (kk = kstart; kk < kend + 1; kk++) {
1015                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1016                     cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
1017                   }
1018                 }
1019               }
1020             }
1021             rows[l] = l + nc * (slot);
1022           }
1023           PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES));
1024         }
1025       }
1026     }
1027     PetscCall(PetscFree(values));
1028     /* do not copy values to GPU since they are all zero and not yet needed there */
1029     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1030     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1031     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1032     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1033     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1034   }
1035   PetscCall(PetscFree2(rows, cols));
1036   PetscFunctionReturn(PETSC_SUCCESS);
1037 }
1038 
1039 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da, Mat J)
1040 {
1041   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, M, N;
1042   PetscInt               lstart, lend, pstart, pend, *dnz, *onz;
1043   MPI_Comm               comm;
1044   DMBoundaryType         bx, by;
1045   ISLocalToGlobalMapping ltog, mltog;
1046   DMDAStencilType        st;
1047   PetscBool              removedups = PETSC_FALSE, alreadyboundtocpu = PETSC_TRUE;
1048 
1049   PetscFunctionBegin;
1050   /*
1051          nc - number of components per grid point
1052          col - number of colors needed in one direction for single component problem
1053   */
1054   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st));
1055   if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE));
1056   col = 2 * s + 1;
1057   /*
1058        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1059        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1060   */
1061   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1062   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1063   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1064   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1065   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1066 
1067   PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols));
1068   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1069 
1070   PetscCall(MatSetBlockSize(J, nc));
1071   /* determine the matrix preallocation information */
1072   MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
1073   for (i = xs; i < xs + nx; i++) {
1074     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1075     pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1076 
1077     for (j = ys; j < ys + ny; j++) {
1078       slot = i - gxs + gnx * (j - gys);
1079 
1080       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1081       lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1082 
1083       cnt = 0;
1084       for (k = 0; k < nc; k++) {
1085         for (l = lstart; l < lend + 1; l++) {
1086           for (p = pstart; p < pend + 1; p++) {
1087             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
1088               cols[cnt++] = k + nc * (slot + gnx * l + p);
1089             }
1090           }
1091         }
1092         rows[k] = k + nc * (slot);
1093       }
1094       if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1095       else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1096     }
1097   }
1098   PetscCall(MatSetBlockSize(J, nc));
1099   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
1100   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
1101   MatPreallocateEnd(dnz, onz);
1102   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1103   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1104 
1105   /*
1106     For each node in the grid: we get the neighbors in the local (on processor ordering
1107     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1108     PETSc ordering.
1109   */
1110   if (!da->prealloc_only) {
1111     for (i = xs; i < xs + nx; i++) {
1112       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1113       pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1114 
1115       for (j = ys; j < ys + ny; j++) {
1116         slot = i - gxs + gnx * (j - gys);
1117 
1118         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1119         lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1120 
1121         cnt = 0;
1122         for (l = lstart; l < lend + 1; l++) {
1123           for (p = pstart; p < pend + 1; p++) {
1124             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
1125               cols[cnt++] = nc * (slot + gnx * l + p);
1126               for (k = 1; k < nc; k++) {
1127                 cols[cnt] = 1 + cols[cnt - 1];
1128                 cnt++;
1129               }
1130             }
1131           }
1132         }
1133         for (k = 0; k < nc; k++) rows[k] = k + nc * (slot);
1134         PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1135       }
1136     }
1137     /* do not copy values to GPU since they are all zero and not yet needed there */
1138     PetscCall(MatBoundToCPU(J, &alreadyboundtocpu));
1139     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1140     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1141     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1142     if (!alreadyboundtocpu) PetscCall(MatBindToCPU(J, PETSC_FALSE));
1143     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1144     if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1145   }
1146   PetscCall(PetscFree2(rows, cols));
1147   PetscFunctionReturn(PETSC_SUCCESS);
1148 }
1149 
1150 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da, Mat J)
1151 {
1152   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1153   PetscInt               m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, M, N;
1154   PetscInt               lstart, lend, pstart, pend, *dnz, *onz;
1155   DM_DA                 *dd = (DM_DA *)da->data;
1156   PetscInt               ifill_col, *ofill = dd->ofill, *dfill = dd->dfill;
1157   MPI_Comm               comm;
1158   DMBoundaryType         bx, by;
1159   ISLocalToGlobalMapping ltog;
1160   DMDAStencilType        st;
1161   PetscBool              removedups = PETSC_FALSE;
1162 
1163   PetscFunctionBegin;
1164   /*
1165          nc - number of components per grid point
1166          col - number of colors needed in one direction for single component problem
1167   */
1168   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st));
1169   col = 2 * s + 1;
1170   /*
1171        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1172        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1173   */
1174   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1175   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1176   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1177   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1178   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1179 
1180   PetscCall(PetscMalloc1(col * col * nc, &cols));
1181   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1182 
1183   PetscCall(MatSetBlockSize(J, nc));
1184   /* determine the matrix preallocation information */
1185   MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
1186   for (i = xs; i < xs + nx; i++) {
1187     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1188     pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1189 
1190     for (j = ys; j < ys + ny; j++) {
1191       slot = i - gxs + gnx * (j - gys);
1192 
1193       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1194       lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1195 
1196       for (k = 0; k < nc; k++) {
1197         cnt = 0;
1198         for (l = lstart; l < lend + 1; l++) {
1199           for (p = pstart; p < pend + 1; p++) {
1200             if (l || p) {
1201               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */
1202                 for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p);
1203               }
1204             } else {
1205               if (dfill) {
1206                 for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p);
1207               } else {
1208                 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p);
1209               }
1210             }
1211           }
1212         }
1213         row    = k + nc * (slot);
1214         maxcnt = PetscMax(maxcnt, cnt);
1215         if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
1216         else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
1217       }
1218     }
1219   }
1220   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
1221   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
1222   MatPreallocateEnd(dnz, onz);
1223   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1224 
1225   /*
1226     For each node in the grid: we get the neighbors in the local (on processor ordering
1227     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1228     PETSc ordering.
1229   */
1230   if (!da->prealloc_only) {
1231     for (i = xs; i < xs + nx; i++) {
1232       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1233       pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1234 
1235       for (j = ys; j < ys + ny; j++) {
1236         slot = i - gxs + gnx * (j - gys);
1237 
1238         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1239         lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1240 
1241         for (k = 0; k < nc; k++) {
1242           cnt = 0;
1243           for (l = lstart; l < lend + 1; l++) {
1244             for (p = pstart; p < pend + 1; p++) {
1245               if (l || p) {
1246                 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */
1247                   for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p);
1248                 }
1249               } else {
1250                 if (dfill) {
1251                   for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p);
1252                 } else {
1253                   for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p);
1254                 }
1255               }
1256             }
1257           }
1258           row = k + nc * (slot);
1259           PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1260         }
1261       }
1262     }
1263     /* do not copy values to GPU since they are all zero and not yet needed there */
1264     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1265     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1266     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1267     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1268     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1269   }
1270   PetscCall(PetscFree(cols));
1271   PetscFunctionReturn(PETSC_SUCCESS);
1272 }
1273 
1274 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da, Mat J)
1275 {
1276   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1277   PetscInt               m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL;
1278   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
1279   MPI_Comm               comm;
1280   DMBoundaryType         bx, by, bz;
1281   ISLocalToGlobalMapping ltog, mltog;
1282   DMDAStencilType        st;
1283   PetscBool              removedups = PETSC_FALSE;
1284 
1285   PetscFunctionBegin;
1286   /*
1287          nc - number of components per grid point
1288          col - number of colors needed in one direction for single component problem
1289   */
1290   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
1291   if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE));
1292   col = 2 * s + 1;
1293 
1294   /*
1295        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1296        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1297   */
1298   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1299   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1300   if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE;
1301 
1302   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
1303   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
1304   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1305 
1306   PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols));
1307   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1308 
1309   PetscCall(MatSetBlockSize(J, nc));
1310   /* determine the matrix preallocation information */
1311   MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
1312   for (i = xs; i < xs + nx; i++) {
1313     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1314     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1315     for (j = ys; j < ys + ny; j++) {
1316       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1317       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1318       for (k = zs; k < zs + nz; k++) {
1319         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1320         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1321 
1322         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1323 
1324         cnt = 0;
1325         for (l = 0; l < nc; l++) {
1326           for (ii = istart; ii < iend + 1; ii++) {
1327             for (jj = jstart; jj < jend + 1; jj++) {
1328               for (kk = kstart; kk < kend + 1; kk++) {
1329                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1330                   cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
1331                 }
1332               }
1333             }
1334           }
1335           rows[l] = l + nc * (slot);
1336         }
1337         if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1338         else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1339       }
1340     }
1341   }
1342   PetscCall(MatSetBlockSize(J, nc));
1343   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
1344   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
1345   MatPreallocateEnd(dnz, onz);
1346   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1347   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1348 
1349   /*
1350     For each node in the grid: we get the neighbors in the local (on processor ordering
1351     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1352     PETSc ordering.
1353   */
1354   if (!da->prealloc_only) {
1355     for (i = xs; i < xs + nx; i++) {
1356       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1357       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1358       for (j = ys; j < ys + ny; j++) {
1359         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1360         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1361         for (k = zs; k < zs + nz; k++) {
1362           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1363           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1364 
1365           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1366 
1367           cnt = 0;
1368           for (kk = kstart; kk < kend + 1; kk++) {
1369             for (jj = jstart; jj < jend + 1; jj++) {
1370               for (ii = istart; ii < iend + 1; ii++) {
1371                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1372                   cols[cnt++] = nc * (slot + ii + gnx * jj + gnx * gny * kk);
1373                   for (l = 1; l < nc; l++) {
1374                     cols[cnt] = 1 + cols[cnt - 1];
1375                     cnt++;
1376                   }
1377                 }
1378               }
1379             }
1380           }
1381           rows[0] = nc * (slot);
1382           for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1383           PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1384         }
1385       }
1386     }
1387     /* do not copy values to GPU since they are all zero and not yet needed there */
1388     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1389     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1390     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1391     if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1392     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1393     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1394   }
1395   PetscCall(PetscFree2(rows, cols));
1396   PetscFunctionReturn(PETSC_SUCCESS);
1397 }
1398 
1399 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da, Mat J)
1400 {
1401   DM_DA                 *dd = (DM_DA *)da->data;
1402   PetscInt               xs, nx, i, j, gxs, gnx, row, k, l;
1403   PetscInt               m, dim, s, *cols = NULL, nc, cnt, maxcnt = 0, *ocols;
1404   PetscInt              *ofill = dd->ofill, *dfill = dd->dfill;
1405   DMBoundaryType         bx;
1406   ISLocalToGlobalMapping ltog;
1407   PetscMPIInt            rank, size;
1408 
1409   PetscFunctionBegin;
1410   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)da), &rank));
1411   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size));
1412 
1413   /*
1414          nc - number of components per grid point
1415   */
1416   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
1417   PetscCheck(s <= 1, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Matrix creation for 1d not implemented correctly for stencil width larger than 1");
1418   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
1419   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
1420 
1421   PetscCall(MatSetBlockSize(J, nc));
1422   PetscCall(PetscCalloc2(nx * nc, &cols, nx * nc, &ocols));
1423 
1424   /*
1425         note should be smaller for first and last process with no periodic
1426         does not handle dfill
1427   */
1428   cnt = 0;
1429   /* coupling with process to the left */
1430   for (i = 0; i < s; i++) {
1431     for (j = 0; j < nc; j++) {
1432       ocols[cnt] = ((rank == 0) ? 0 : (s - i) * (ofill[j + 1] - ofill[j]));
1433       cols[cnt]  = dfill[j + 1] - dfill[j] + (s + i) * (ofill[j + 1] - ofill[j]);
1434       if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1435         if (size > 1) ocols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]);
1436         else cols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]);
1437       }
1438       maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1439       cnt++;
1440     }
1441   }
1442   for (i = s; i < nx - s; i++) {
1443     for (j = 0; j < nc; j++) {
1444       cols[cnt] = dfill[j + 1] - dfill[j] + 2 * s * (ofill[j + 1] - ofill[j]);
1445       maxcnt    = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1446       cnt++;
1447     }
1448   }
1449   /* coupling with process to the right */
1450   for (i = nx - s; i < nx; i++) {
1451     for (j = 0; j < nc; j++) {
1452       ocols[cnt] = ((rank == (size - 1)) ? 0 : (i - nx + s + 1) * (ofill[j + 1] - ofill[j]));
1453       cols[cnt]  = dfill[j + 1] - dfill[j] + (s + nx - i - 1) * (ofill[j + 1] - ofill[j]);
1454       if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1455         if (size > 1) ocols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]);
1456         else cols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]);
1457       }
1458       maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1459       cnt++;
1460     }
1461   }
1462 
1463   PetscCall(MatSeqAIJSetPreallocation(J, 0, cols));
1464   PetscCall(MatMPIAIJSetPreallocation(J, 0, cols, 0, ocols));
1465   PetscCall(PetscFree2(cols, ocols));
1466 
1467   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1468   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1469 
1470   /*
1471     For each node in the grid: we get the neighbors in the local (on processor ordering
1472     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1473     PETSc ordering.
1474   */
1475   if (!da->prealloc_only) {
1476     PetscCall(PetscMalloc1(maxcnt, &cols));
1477     row = xs * nc;
1478     /* coupling with process to the left */
1479     for (i = xs; i < xs + s; i++) {
1480       for (j = 0; j < nc; j++) {
1481         cnt = 0;
1482         if (rank) {
1483           for (l = 0; l < s; l++) {
1484             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1485           }
1486         }
1487         if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1488           for (l = 0; l < s; l++) {
1489             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (m + i - s - l) * nc + ofill[k];
1490           }
1491         }
1492         if (dfill) {
1493           for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1494         } else {
1495           for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1496         }
1497         for (l = 0; l < s; l++) {
1498           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1499         }
1500         PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1501         row++;
1502       }
1503     }
1504     for (i = xs + s; i < xs + nx - s; i++) {
1505       for (j = 0; j < nc; j++) {
1506         cnt = 0;
1507         for (l = 0; l < s; l++) {
1508           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1509         }
1510         if (dfill) {
1511           for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1512         } else {
1513           for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1514         }
1515         for (l = 0; l < s; l++) {
1516           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1517         }
1518         PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1519         row++;
1520       }
1521     }
1522     /* coupling with process to the right */
1523     for (i = xs + nx - s; i < xs + nx; i++) {
1524       for (j = 0; j < nc; j++) {
1525         cnt = 0;
1526         for (l = 0; l < s; l++) {
1527           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1528         }
1529         if (dfill) {
1530           for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1531         } else {
1532           for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1533         }
1534         if (rank < size - 1) {
1535           for (l = 0; l < s; l++) {
1536             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1537           }
1538         }
1539         if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1540           for (l = 0; l < s; l++) {
1541             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s - l - m + 2) * nc + ofill[k];
1542           }
1543         }
1544         PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1545         row++;
1546       }
1547     }
1548     PetscCall(PetscFree(cols));
1549     /* do not copy values to GPU since they are all zero and not yet needed there */
1550     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1551     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1552     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1553     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1554     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1555   }
1556   PetscFunctionReturn(PETSC_SUCCESS);
1557 }
1558 
1559 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da, Mat J)
1560 {
1561   PetscInt               xs, nx, i, i1, slot, gxs, gnx;
1562   PetscInt               m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l;
1563   PetscInt               istart, iend;
1564   DMBoundaryType         bx;
1565   ISLocalToGlobalMapping ltog, mltog;
1566 
1567   PetscFunctionBegin;
1568   /*
1569          nc - number of components per grid point
1570          col - number of colors needed in one direction for single component problem
1571   */
1572   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
1573   if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE));
1574   col = 2 * s + 1;
1575 
1576   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
1577   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
1578 
1579   PetscCall(MatSetBlockSize(J, nc));
1580   PetscCall(MatSeqAIJSetPreallocation(J, col * nc, NULL));
1581   PetscCall(MatMPIAIJSetPreallocation(J, col * nc, NULL, col * nc, NULL));
1582 
1583   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1584   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1585   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1586 
1587   /*
1588     For each node in the grid: we get the neighbors in the local (on processor ordering
1589     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1590     PETSc ordering.
1591   */
1592   if (!da->prealloc_only) {
1593     PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols));
1594     for (i = xs; i < xs + nx; i++) {
1595       istart = PetscMax(-s, gxs - i);
1596       iend   = PetscMin(s, gxs + gnx - i - 1);
1597       slot   = i - gxs;
1598 
1599       cnt = 0;
1600       for (i1 = istart; i1 < iend + 1; i1++) {
1601         cols[cnt++] = nc * (slot + i1);
1602         for (l = 1; l < nc; l++) {
1603           cols[cnt] = 1 + cols[cnt - 1];
1604           cnt++;
1605         }
1606       }
1607       rows[0] = nc * (slot);
1608       for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1609       PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1610     }
1611     /* do not copy values to GPU since they are all zero and not yet needed there */
1612     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1613     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1614     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1615     if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1616     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1617     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1618     PetscCall(PetscFree2(rows, cols));
1619   }
1620   PetscFunctionReturn(PETSC_SUCCESS);
1621 }
1622 
1623 PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM da, Mat J)
1624 {
1625   PetscInt               xs, nx, i, i1, slot, gxs, gnx;
1626   PetscInt               m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l;
1627   PetscInt               istart, iend;
1628   DMBoundaryType         bx;
1629   ISLocalToGlobalMapping ltog, mltog;
1630 
1631   PetscFunctionBegin;
1632   /*
1633          nc - number of components per grid point
1634          col - number of colors needed in one direction for single component problem
1635   */
1636   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
1637   col = 2 * s + 1;
1638 
1639   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
1640   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
1641 
1642   PetscCall(MatSetBlockSize(J, nc));
1643   PetscCall(MatSeqAIJSetTotalPreallocation(J, nx * nc * col * nc));
1644 
1645   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1646   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1647   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1648 
1649   /*
1650     For each node in the grid: we get the neighbors in the local (on processor ordering
1651     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1652     PETSc ordering.
1653   */
1654   if (!da->prealloc_only) {
1655     PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols));
1656     for (i = xs; i < xs + nx; i++) {
1657       istart = PetscMax(-s, gxs - i);
1658       iend   = PetscMin(s, gxs + gnx - i - 1);
1659       slot   = i - gxs;
1660 
1661       cnt = 0;
1662       for (i1 = istart; i1 < iend + 1; i1++) {
1663         cols[cnt++] = nc * (slot + i1);
1664         for (l = 1; l < nc; l++) {
1665           cols[cnt] = 1 + cols[cnt - 1];
1666           cnt++;
1667         }
1668       }
1669       rows[0] = nc * (slot);
1670       for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1671       PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1672     }
1673     /* do not copy values to GPU since they are all zero and not yet needed there */
1674     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1675     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1676     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1677     if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1678     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1679     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1680     PetscCall(PetscFree2(rows, cols));
1681   }
1682   PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1683   PetscFunctionReturn(PETSC_SUCCESS);
1684 }
1685 
1686 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da, Mat J)
1687 {
1688   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1689   PetscInt               m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz;
1690   PetscInt               istart, iend, jstart, jend, ii, jj;
1691   MPI_Comm               comm;
1692   PetscScalar           *values;
1693   DMBoundaryType         bx, by;
1694   DMDAStencilType        st;
1695   ISLocalToGlobalMapping ltog;
1696 
1697   PetscFunctionBegin;
1698   /*
1699      nc - number of components per grid point
1700      col - number of colors needed in one direction for single component problem
1701   */
1702   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st));
1703   col = 2 * s + 1;
1704 
1705   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1706   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1707   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1708 
1709   PetscCall(PetscMalloc1(col * col * nc * nc, &cols));
1710 
1711   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1712 
1713   /* determine the matrix preallocation information */
1714   MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz);
1715   for (i = xs; i < xs + nx; i++) {
1716     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1717     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1718     for (j = ys; j < ys + ny; j++) {
1719       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1720       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1721       slot   = i - gxs + gnx * (j - gys);
1722 
1723       /* Find block columns in block row */
1724       cnt = 0;
1725       for (ii = istart; ii < iend + 1; ii++) {
1726         for (jj = jstart; jj < jend + 1; jj++) {
1727           if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1728             cols[cnt++] = slot + ii + gnx * jj;
1729           }
1730         }
1731       }
1732       PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz));
1733     }
1734   }
1735   PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz));
1736   PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
1737   MatPreallocateEnd(dnz, onz);
1738 
1739   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1740 
1741   /*
1742     For each node in the grid: we get the neighbors in the local (on processor ordering
1743     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1744     PETSc ordering.
1745   */
1746   if (!da->prealloc_only) {
1747     PetscCall(PetscCalloc1(col * col * nc * nc, &values));
1748     for (i = xs; i < xs + nx; i++) {
1749       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1750       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1751       for (j = ys; j < ys + ny; j++) {
1752         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1753         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1754         slot   = i - gxs + gnx * (j - gys);
1755         cnt    = 0;
1756         for (ii = istart; ii < iend + 1; ii++) {
1757           for (jj = jstart; jj < jend + 1; jj++) {
1758             if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1759               cols[cnt++] = slot + ii + gnx * jj;
1760             }
1761           }
1762         }
1763         PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
1764       }
1765     }
1766     PetscCall(PetscFree(values));
1767     /* do not copy values to GPU since they are all zero and not yet needed there */
1768     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1769     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1770     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1771     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1772     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1773   }
1774   PetscCall(PetscFree(cols));
1775   PetscFunctionReturn(PETSC_SUCCESS);
1776 }
1777 
1778 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da, Mat J)
1779 {
1780   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1781   PetscInt               m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz;
1782   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk;
1783   MPI_Comm               comm;
1784   PetscScalar           *values;
1785   DMBoundaryType         bx, by, bz;
1786   DMDAStencilType        st;
1787   ISLocalToGlobalMapping ltog;
1788 
1789   PetscFunctionBegin;
1790   /*
1791          nc - number of components per grid point
1792          col - number of colors needed in one direction for single component problem
1793   */
1794   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st));
1795   col = 2 * s + 1;
1796 
1797   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
1798   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
1799   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1800 
1801   PetscCall(PetscMalloc1(col * col * col, &cols));
1802 
1803   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1804 
1805   /* determine the matrix preallocation information */
1806   MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz);
1807   for (i = xs; i < xs + nx; i++) {
1808     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1809     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1810     for (j = ys; j < ys + ny; j++) {
1811       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1812       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1813       for (k = zs; k < zs + nz; k++) {
1814         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1815         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1816 
1817         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1818 
1819         /* Find block columns in block row */
1820         cnt = 0;
1821         for (ii = istart; ii < iend + 1; ii++) {
1822           for (jj = jstart; jj < jend + 1; jj++) {
1823             for (kk = kstart; kk < kend + 1; kk++) {
1824               if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1825                 cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
1826               }
1827             }
1828           }
1829         }
1830         PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz));
1831       }
1832     }
1833   }
1834   PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz));
1835   PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
1836   MatPreallocateEnd(dnz, onz);
1837 
1838   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1839 
1840   /*
1841     For each node in the grid: we get the neighbors in the local (on processor ordering
1842     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1843     PETSc ordering.
1844   */
1845   if (!da->prealloc_only) {
1846     PetscCall(PetscCalloc1(col * col * col * nc * nc, &values));
1847     for (i = xs; i < xs + nx; i++) {
1848       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1849       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1850       for (j = ys; j < ys + ny; j++) {
1851         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1852         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1853         for (k = zs; k < zs + nz; k++) {
1854           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1855           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1856 
1857           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1858 
1859           cnt = 0;
1860           for (ii = istart; ii < iend + 1; ii++) {
1861             for (jj = jstart; jj < jend + 1; jj++) {
1862               for (kk = kstart; kk < kend + 1; kk++) {
1863                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1864                   cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
1865                 }
1866               }
1867             }
1868           }
1869           PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
1870         }
1871       }
1872     }
1873     PetscCall(PetscFree(values));
1874     /* do not copy values to GPU since they are all zero and not yet needed there */
1875     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1876     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1877     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1878     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1879     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1880   }
1881   PetscCall(PetscFree(cols));
1882   PetscFunctionReturn(PETSC_SUCCESS);
1883 }
1884 
1885 /*
1886   This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to
1887   identify in the local ordering with periodic domain.
1888 */
1889 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog, PetscInt *row, PetscInt *cnt, PetscInt col[])
1890 {
1891   PetscInt i, n;
1892 
1893   PetscFunctionBegin;
1894   PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, 1, row, row));
1895   PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, *cnt, col, col));
1896   for (i = 0, n = 0; i < *cnt; i++) {
1897     if (col[i] >= *row) col[n++] = col[i];
1898   }
1899   *cnt = n;
1900   PetscFunctionReturn(PETSC_SUCCESS);
1901 }
1902 
1903 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da, Mat J)
1904 {
1905   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1906   PetscInt               m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz;
1907   PetscInt               istart, iend, jstart, jend, ii, jj;
1908   MPI_Comm               comm;
1909   PetscScalar           *values;
1910   DMBoundaryType         bx, by;
1911   DMDAStencilType        st;
1912   ISLocalToGlobalMapping ltog;
1913 
1914   PetscFunctionBegin;
1915   /*
1916      nc - number of components per grid point
1917      col - number of colors needed in one direction for single component problem
1918   */
1919   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st));
1920   col = 2 * s + 1;
1921 
1922   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1923   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1924   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1925 
1926   PetscCall(PetscMalloc1(col * col * nc * nc, &cols));
1927 
1928   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1929 
1930   /* determine the matrix preallocation information */
1931   MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz);
1932   for (i = xs; i < xs + nx; i++) {
1933     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1934     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1935     for (j = ys; j < ys + ny; j++) {
1936       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1937       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1938       slot   = i - gxs + gnx * (j - gys);
1939 
1940       /* Find block columns in block row */
1941       cnt = 0;
1942       for (ii = istart; ii < iend + 1; ii++) {
1943         for (jj = jstart; jj < jend + 1; jj++) {
1944           if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj;
1945         }
1946       }
1947       PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
1948       PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz));
1949     }
1950   }
1951   PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz));
1952   PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
1953   MatPreallocateEnd(dnz, onz);
1954 
1955   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1956 
1957   /*
1958     For each node in the grid: we get the neighbors in the local (on processor ordering
1959     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1960     PETSc ordering.
1961   */
1962   if (!da->prealloc_only) {
1963     PetscCall(PetscCalloc1(col * col * nc * nc, &values));
1964     for (i = xs; i < xs + nx; i++) {
1965       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1966       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1967       for (j = ys; j < ys + ny; j++) {
1968         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1969         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1970         slot   = i - gxs + gnx * (j - gys);
1971 
1972         /* Find block columns in block row */
1973         cnt = 0;
1974         for (ii = istart; ii < iend + 1; ii++) {
1975           for (jj = jstart; jj < jend + 1; jj++) {
1976             if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj;
1977           }
1978         }
1979         PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
1980         PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
1981       }
1982     }
1983     PetscCall(PetscFree(values));
1984     /* do not copy values to GPU since they are all zero and not yet needed there */
1985     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1986     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1987     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1988     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1989     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1990   }
1991   PetscCall(PetscFree(cols));
1992   PetscFunctionReturn(PETSC_SUCCESS);
1993 }
1994 
1995 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da, Mat J)
1996 {
1997   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1998   PetscInt               m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz;
1999   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk;
2000   MPI_Comm               comm;
2001   PetscScalar           *values;
2002   DMBoundaryType         bx, by, bz;
2003   DMDAStencilType        st;
2004   ISLocalToGlobalMapping ltog;
2005 
2006   PetscFunctionBegin;
2007   /*
2008      nc - number of components per grid point
2009      col - number of colors needed in one direction for single component problem
2010   */
2011   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st));
2012   col = 2 * s + 1;
2013 
2014   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
2015   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
2016   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
2017 
2018   /* create the matrix */
2019   PetscCall(PetscMalloc1(col * col * col, &cols));
2020 
2021   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
2022 
2023   /* determine the matrix preallocation information */
2024   MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz);
2025   for (i = xs; i < xs + nx; i++) {
2026     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2027     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2028     for (j = ys; j < ys + ny; j++) {
2029       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2030       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2031       for (k = zs; k < zs + nz; k++) {
2032         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2033         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2034 
2035         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2036 
2037         /* Find block columns in block row */
2038         cnt = 0;
2039         for (ii = istart; ii < iend + 1; ii++) {
2040           for (jj = jstart; jj < jend + 1; jj++) {
2041             for (kk = kstart; kk < kend + 1; kk++) {
2042               if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
2043             }
2044           }
2045         }
2046         PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
2047         PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz));
2048       }
2049     }
2050   }
2051   PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz));
2052   PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
2053   MatPreallocateEnd(dnz, onz);
2054 
2055   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
2056 
2057   /*
2058     For each node in the grid: we get the neighbors in the local (on processor ordering
2059     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2060     PETSc ordering.
2061   */
2062   if (!da->prealloc_only) {
2063     PetscCall(PetscCalloc1(col * col * col * nc * nc, &values));
2064     for (i = xs; i < xs + nx; i++) {
2065       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2066       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2067       for (j = ys; j < ys + ny; j++) {
2068         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2069         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2070         for (k = zs; k < zs + nz; k++) {
2071           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2072           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2073 
2074           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2075 
2076           cnt = 0;
2077           for (ii = istart; ii < iend + 1; ii++) {
2078             for (jj = jstart; jj < jend + 1; jj++) {
2079               for (kk = kstart; kk < kend + 1; kk++) {
2080                 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
2081               }
2082             }
2083           }
2084           PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
2085           PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
2086         }
2087       }
2088     }
2089     PetscCall(PetscFree(values));
2090     /* do not copy values to GPU since they are all zero and not yet needed there */
2091     PetscCall(MatBindToCPU(J, PETSC_TRUE));
2092     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
2093     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
2094     PetscCall(MatBindToCPU(J, PETSC_FALSE));
2095     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2096   }
2097   PetscCall(PetscFree(cols));
2098   PetscFunctionReturn(PETSC_SUCCESS);
2099 }
2100 
2101 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da, Mat J)
2102 {
2103   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
2104   PetscInt               m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, *dnz, *onz;
2105   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
2106   DM_DA                 *dd = (DM_DA *)da->data;
2107   PetscInt               ifill_col, *dfill = dd->dfill, *ofill = dd->ofill;
2108   MPI_Comm               comm;
2109   PetscScalar           *values;
2110   DMBoundaryType         bx, by, bz;
2111   ISLocalToGlobalMapping ltog;
2112   DMDAStencilType        st;
2113   PetscBool              removedups = PETSC_FALSE;
2114 
2115   PetscFunctionBegin;
2116   /*
2117          nc - number of components per grid point
2118          col - number of colors needed in one direction for single component problem
2119   */
2120   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
2121   col = 2 * s + 1;
2122 
2123   /*
2124        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
2125        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
2126   */
2127   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
2128   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
2129   if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE;
2130 
2131   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
2132   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
2133   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
2134 
2135   PetscCall(PetscMalloc1(col * col * col * nc, &cols));
2136   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
2137 
2138   /* determine the matrix preallocation information */
2139   MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
2140 
2141   PetscCall(MatSetBlockSize(J, nc));
2142   for (i = xs; i < xs + nx; i++) {
2143     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2144     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2145     for (j = ys; j < ys + ny; j++) {
2146       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2147       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2148       for (k = zs; k < zs + nz; k++) {
2149         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2150         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2151 
2152         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2153 
2154         for (l = 0; l < nc; l++) {
2155           cnt = 0;
2156           for (ii = istart; ii < iend + 1; ii++) {
2157             for (jj = jstart; jj < jend + 1; jj++) {
2158               for (kk = kstart; kk < kend + 1; kk++) {
2159                 if (ii || jj || kk) {
2160                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
2161                     for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2162                   }
2163                 } else {
2164                   if (dfill) {
2165                     for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2166                   } else {
2167                     for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2168                   }
2169                 }
2170               }
2171             }
2172           }
2173           row    = l + nc * (slot);
2174           maxcnt = PetscMax(maxcnt, cnt);
2175           if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
2176           else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
2177         }
2178       }
2179     }
2180   }
2181   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
2182   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
2183   MatPreallocateEnd(dnz, onz);
2184   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
2185 
2186   /*
2187     For each node in the grid: we get the neighbors in the local (on processor ordering
2188     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2189     PETSc ordering.
2190   */
2191   if (!da->prealloc_only) {
2192     PetscCall(PetscCalloc1(maxcnt, &values));
2193     for (i = xs; i < xs + nx; i++) {
2194       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2195       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2196       for (j = ys; j < ys + ny; j++) {
2197         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2198         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2199         for (k = zs; k < zs + nz; k++) {
2200           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2201           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2202 
2203           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2204 
2205           for (l = 0; l < nc; l++) {
2206             cnt = 0;
2207             for (ii = istart; ii < iend + 1; ii++) {
2208               for (jj = jstart; jj < jend + 1; jj++) {
2209                 for (kk = kstart; kk < kend + 1; kk++) {
2210                   if (ii || jj || kk) {
2211                     if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
2212                       for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2213                     }
2214                   } else {
2215                     if (dfill) {
2216                       for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2217                     } else {
2218                       for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2219                     }
2220                   }
2221                 }
2222               }
2223             }
2224             row = l + nc * (slot);
2225             PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, values, INSERT_VALUES));
2226           }
2227         }
2228       }
2229     }
2230     PetscCall(PetscFree(values));
2231     /* do not copy values to GPU since they are all zero and not yet needed there */
2232     PetscCall(MatBindToCPU(J, PETSC_TRUE));
2233     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
2234     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
2235     PetscCall(MatBindToCPU(J, PETSC_FALSE));
2236     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2237   }
2238   PetscCall(PetscFree(cols));
2239   PetscFunctionReturn(PETSC_SUCCESS);
2240 }
2241