xref: /petsc/src/dm/impls/da/fdda.c (revision 5d7a6ebe9dde080aedbe86be0085708de8f97bb7)
1 #include <petsc/private/dmdaimpl.h> /*I      "petscdmda.h"     I*/
2 #include <petscmat.h>
3 #include <petscbt.h>
4 
5 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM, ISColoringType, ISColoring *);
6 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM, ISColoringType, ISColoring *);
7 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM, ISColoringType, ISColoring *);
8 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM, ISColoringType, ISColoring *);
9 
10 /*
11    For ghost i that may be negative or greater than the upper bound this
12   maps it into the 0:m-1 range using periodicity
13 */
14 #define SetInRange(i, m) ((i < 0) ? m + i : ((i >= m) ? i - m : i))
15 
16 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill, PetscInt w, PetscInt **rfill)
17 {
18   PetscInt i, j, nz, *fill;
19 
20   PetscFunctionBegin;
21   if (!dfill) PetscFunctionReturn(PETSC_SUCCESS);
22 
23   /* count number nonzeros */
24   nz = 0;
25   for (i = 0; i < w; i++) {
26     for (j = 0; j < w; j++) {
27       if (dfill[w * i + j]) nz++;
28     }
29   }
30   PetscCall(PetscMalloc1(nz + w + 1, &fill));
31   /* construct modified CSR storage of nonzero structure */
32   /*  fill[0 -- w] marks starts of each row of column indices (and end of last row)
33    so fill[1] - fill[0] gives number of nonzeros in first row etc */
34   nz = w + 1;
35   for (i = 0; i < w; i++) {
36     fill[i] = nz;
37     for (j = 0; j < w; j++) {
38       if (dfill[w * i + j]) {
39         fill[nz] = j;
40         nz++;
41       }
42     }
43   }
44   fill[w] = nz;
45 
46   *rfill = fill;
47   PetscFunctionReturn(PETSC_SUCCESS);
48 }
49 
50 static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse, PetscInt w, PetscInt **rfill)
51 {
52   PetscInt nz;
53 
54   PetscFunctionBegin;
55   if (!dfillsparse) PetscFunctionReturn(PETSC_SUCCESS);
56 
57   /* Determine number of non-zeros */
58   nz = (dfillsparse[w] - w - 1);
59 
60   /* Allocate space for our copy of the given sparse matrix representation. */
61   PetscCall(PetscMalloc1(nz + w + 1, rfill));
62   PetscCall(PetscArraycpy(*rfill, dfillsparse, nz + w + 1));
63   PetscFunctionReturn(PETSC_SUCCESS);
64 }
65 
66 static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd)
67 {
68   PetscInt i, k, cnt = 1;
69 
70   PetscFunctionBegin;
71 
72   /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of
73    columns to the left with any nonzeros in them plus 1 */
74   PetscCall(PetscCalloc1(dd->w, &dd->ofillcols));
75   for (i = 0; i < dd->w; i++) {
76     for (k = dd->ofill[i]; k < dd->ofill[i + 1]; k++) dd->ofillcols[dd->ofill[k]] = 1;
77   }
78   for (i = 0; i < dd->w; i++) {
79     if (dd->ofillcols[i]) dd->ofillcols[i] = cnt++;
80   }
81   PetscFunctionReturn(PETSC_SUCCESS);
82 }
83 
84 /*@
85   DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem
86   of the matrix returned by `DMCreateMatrix()`.
87 
88   Logically Collective
89 
90   Input Parameters:
91 + da    - the distributed array
92 . dfill - the fill pattern in the diagonal block (may be NULL, means use dense block)
93 - ofill - the fill pattern in the off-diagonal blocks
94 
95   Level: developer
96 
97   Notes:
98   This only makes sense when you are doing multicomponent problems but using the
99   `MATMPIAIJ` matrix format
100 
101   The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries
102   representing coupling and 0 entries for missing coupling. For example
103 .vb
104             dfill[9] = {1, 0, 0,
105                         1, 1, 0,
106                         0, 1, 1}
107 .ve
108   means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with
109   itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the
110   diagonal block).
111 
112   `DMDASetGetMatrix()` allows you to provide general code for those more complicated nonzero patterns then
113   can be represented in the dfill, ofill format
114 
115   Contributed by Glenn Hammond
116 
117 .seealso: `DM`, `DMDA`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()`
118 @*/
119 PetscErrorCode DMDASetBlockFills(DM da, const PetscInt *dfill, const PetscInt *ofill)
120 {
121   DM_DA *dd = (DM_DA *)da->data;
122 
123   PetscFunctionBegin;
124   /* save the given dfill and ofill information */
125   PetscCall(DMDASetBlockFills_Private(dfill, dd->w, &dd->dfill));
126   PetscCall(DMDASetBlockFills_Private(ofill, dd->w, &dd->ofill));
127 
128   /* count nonzeros in ofill columns */
129   PetscCall(DMDASetBlockFills_Private2(dd));
130   PetscFunctionReturn(PETSC_SUCCESS);
131 }
132 
133 /*@
134   DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem
135   of the matrix returned by `DMCreateMatrix()`, using sparse representations
136   of fill patterns.
137 
138   Logically Collective
139 
140   Input Parameters:
141 + da          - the distributed array
142 . dfillsparse - the sparse fill pattern in the diagonal block (may be `NULL`, means use dense block)
143 - ofillsparse - the sparse fill pattern in the off-diagonal blocks
144 
145   Level: developer
146 
147   Notes:
148   This only makes sense when you are doing multicomponent problems but using the
149   `MATMPIAIJ` matrix format
150 
151   The format for `dfill` and `ofill` is a sparse representation of a
152   dof-by-dof matrix with 1 entries representing coupling and 0 entries
153   for missing coupling.  The sparse representation is a 1 dimensional
154   array of length nz + dof + 1, where nz is the number of non-zeros in
155   the matrix.  The first dof entries in the array give the
156   starting array indices of each row's items in the rest of the array,
157   the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array)
158   and the remaining nz items give the column indices of each of
159   the 1s within the logical 2D matrix.  Each row's items within
160   the array are the column indices of the 1s within that row
161   of the 2D matrix.  PETSc developers may recognize that this is the
162   same format as that computed by the `DMDASetBlockFills_Private()`
163   function from a dense 2D matrix representation.
164 
165   `DMDASetGetMatrix()` allows you to provide general code for those more complicated nonzero patterns then
166   can be represented in the `dfill`, `ofill` format
167 
168   Contributed by Philip C. Roth
169 
170 .seealso: `DM`, `DMDA`, `DMDASetBlockFills()`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()`
171 @*/
172 PetscErrorCode DMDASetBlockFillsSparse(DM da, const PetscInt *dfillsparse, const PetscInt *ofillsparse)
173 {
174   DM_DA *dd = (DM_DA *)da->data;
175 
176   PetscFunctionBegin;
177   /* save the given dfill and ofill information */
178   PetscCall(DMDASetBlockFillsSparse_Private(dfillsparse, dd->w, &dd->dfill));
179   PetscCall(DMDASetBlockFillsSparse_Private(ofillsparse, dd->w, &dd->ofill));
180 
181   /* count nonzeros in ofill columns */
182   PetscCall(DMDASetBlockFills_Private2(dd));
183   PetscFunctionReturn(PETSC_SUCCESS);
184 }
185 
186 PetscErrorCode DMCreateColoring_DA(DM da, ISColoringType ctype, ISColoring *coloring)
187 {
188   PetscInt       dim, m, n, p, nc;
189   DMBoundaryType bx, by, bz;
190   MPI_Comm       comm;
191   PetscMPIInt    size;
192   PetscBool      isBAIJ;
193   DM_DA         *dd = (DM_DA *)da->data;
194 
195   PetscFunctionBegin;
196   /*
197                                   m
198           ------------------------------------------------------
199          |                                                     |
200          |                                                     |
201          |               ----------------------                |
202          |               |                    |                |
203       n  |           yn  |                    |                |
204          |               |                    |                |
205          |               .---------------------                |
206          |             (xs,ys)     xn                          |
207          |            .                                        |
208          |         (gxs,gys)                                   |
209          |                                                     |
210           -----------------------------------------------------
211   */
212 
213   /*
214          nc - number of components per grid point
215          col - number of colors needed in one direction for single component problem
216 
217   */
218   PetscCall(DMDAGetInfo(da, &dim, NULL, NULL, NULL, &m, &n, &p, &nc, NULL, &bx, &by, &bz, NULL));
219 
220   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
221   PetscCallMPI(MPI_Comm_size(comm, &size));
222   if (ctype == IS_COLORING_LOCAL) {
223     if (size == 1) {
224       ctype = IS_COLORING_GLOBAL;
225     } else {
226       PetscCheck((dim == 1) || !((m == 1 && bx == DM_BOUNDARY_PERIODIC) || (n == 1 && by == DM_BOUNDARY_PERIODIC) || (p == 1 && bz == DM_BOUNDARY_PERIODIC)), PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain on the same process");
227     }
228   }
229 
230   /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ
231      matrices is for the blocks, not the individual matrix elements  */
232   PetscCall(PetscStrbeginswith(da->mattype, MATBAIJ, &isBAIJ));
233   if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATMPIBAIJ, &isBAIJ));
234   if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATSEQBAIJ, &isBAIJ));
235   if (isBAIJ) {
236     dd->w  = 1;
237     dd->xs = dd->xs / nc;
238     dd->xe = dd->xe / nc;
239     dd->Xs = dd->Xs / nc;
240     dd->Xe = dd->Xe / nc;
241   }
242 
243   /*
244      We do not provide a getcoloring function in the DMDA operations because
245    the basic DMDA does not know about matrices. We think of DMDA as being
246    more low-level then matrices.
247   */
248   if (dim == 1) PetscCall(DMCreateColoring_DA_1d_MPIAIJ(da, ctype, coloring));
249   else if (dim == 2) PetscCall(DMCreateColoring_DA_2d_MPIAIJ(da, ctype, coloring));
250   else if (dim == 3) PetscCall(DMCreateColoring_DA_3d_MPIAIJ(da, ctype, coloring));
251   else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not done for %" PetscInt_FMT " dimension, send us mail petsc-maint@mcs.anl.gov for code", dim);
252   if (isBAIJ) {
253     dd->w  = nc;
254     dd->xs = dd->xs * nc;
255     dd->xe = dd->xe * nc;
256     dd->Xs = dd->Xs * nc;
257     dd->Xe = dd->Xe * nc;
258   }
259   PetscFunctionReturn(PETSC_SUCCESS);
260 }
261 
262 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
263 {
264   PetscInt         xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, M, N, dim, s, k, nc, col;
265   PetscInt         ncolors = 0;
266   MPI_Comm         comm;
267   DMBoundaryType   bx, by;
268   DMDAStencilType  st;
269   ISColoringValue *colors;
270   DM_DA           *dd = (DM_DA *)da->data;
271 
272   PetscFunctionBegin;
273   /*
274          nc - number of components per grid point
275          col - number of colors needed in one direction for single component problem
276 
277   */
278   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st));
279   col = 2 * s + 1;
280   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
281   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
282   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
283 
284   /* special case as taught to us by Paul Hovland */
285   if (st == DMDA_STENCIL_STAR && s == 1) {
286     PetscCall(DMCreateColoring_DA_2d_5pt_MPIAIJ(da, ctype, coloring));
287   } else {
288     if (ctype == IS_COLORING_GLOBAL) {
289       if (!dd->localcoloring) {
290         PetscCall(PetscMalloc1(nc * nx * ny, &colors));
291         ii = 0;
292         for (j = ys; j < ys + ny; j++) {
293           for (i = xs; i < xs + nx; i++) {
294             for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((i % col) + col * (j % col));
295           }
296         }
297         ncolors = nc + nc * (col - 1 + col * (col - 1));
298         PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring));
299       }
300       *coloring = dd->localcoloring;
301     } else if (ctype == IS_COLORING_LOCAL) {
302       if (!dd->ghostedcoloring) {
303         PetscCall(PetscMalloc1(nc * gnx * gny, &colors));
304         ii = 0;
305         for (j = gys; j < gys + gny; j++) {
306           for (i = gxs; i < gxs + gnx; i++) {
307             for (k = 0; k < nc; k++) {
308               /* the complicated stuff is to handle periodic boundaries */
309               colors[ii++] = k + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col));
310             }
311           }
312         }
313         ncolors = nc + nc * (col - 1 + col * (col - 1));
314         PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
315         /* PetscIntView(ncolors,(PetscInt*)colors,0); */
316 
317         PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
318       }
319       *coloring = dd->ghostedcoloring;
320     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
321   }
322   PetscCall(ISColoringReference(*coloring));
323   PetscFunctionReturn(PETSC_SUCCESS);
324 }
325 
326 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
327 {
328   PetscInt         xs, ys, nx, ny, i, j, gxs, gys, gnx, gny, m, n, p, dim, s, k, nc, col, zs, gzs, ii, l, nz, gnz, M, N, P;
329   PetscInt         ncolors;
330   MPI_Comm         comm;
331   DMBoundaryType   bx, by, bz;
332   DMDAStencilType  st;
333   ISColoringValue *colors;
334   DM_DA           *dd = (DM_DA *)da->data;
335 
336   PetscFunctionBegin;
337   /*
338          nc - number of components per grid point
339          col - number of colors needed in one direction for single component problem
340   */
341   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
342   col = 2 * s + 1;
343   PetscCheck(bx != DM_BOUNDARY_PERIODIC || (m % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in X is divisible\n\
344                  by 2*stencil_width + 1\n");
345   PetscCheck(by != DM_BOUNDARY_PERIODIC || (n % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Y is divisible\n\
346                  by 2*stencil_width + 1\n");
347   PetscCheck(bz != DM_BOUNDARY_PERIODIC || (p % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Z is divisible\n\
348                  by 2*stencil_width + 1\n");
349 
350   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
351   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
352   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
353 
354   /* create the coloring */
355   if (ctype == IS_COLORING_GLOBAL) {
356     if (!dd->localcoloring) {
357       PetscCall(PetscMalloc1(nc * nx * ny * nz, &colors));
358       ii = 0;
359       for (k = zs; k < zs + nz; k++) {
360         for (j = ys; j < ys + ny; j++) {
361           for (i = xs; i < xs + nx; i++) {
362             for (l = 0; l < nc; l++) colors[ii++] = l + nc * ((i % col) + col * (j % col) + col * col * (k % col));
363           }
364         }
365       }
366       ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1));
367       PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny * nz, colors, PETSC_OWN_POINTER, &dd->localcoloring));
368     }
369     *coloring = dd->localcoloring;
370   } else if (ctype == IS_COLORING_LOCAL) {
371     if (!dd->ghostedcoloring) {
372       PetscCall(PetscMalloc1(nc * gnx * gny * gnz, &colors));
373       ii = 0;
374       for (k = gzs; k < gzs + gnz; k++) {
375         for (j = gys; j < gys + gny; j++) {
376           for (i = gxs; i < gxs + gnx; i++) {
377             for (l = 0; l < nc; l++) {
378               /* the complicated stuff is to handle periodic boundaries */
379               colors[ii++] = l + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col) + col * col * (SetInRange(k, p) % col));
380             }
381           }
382         }
383       }
384       ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1));
385       PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny * gnz, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
386       PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
387     }
388     *coloring = dd->ghostedcoloring;
389   } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
390   PetscCall(ISColoringReference(*coloring));
391   PetscFunctionReturn(PETSC_SUCCESS);
392 }
393 
394 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
395 {
396   PetscInt         xs, nx, i, i1, gxs, gnx, l, m, M, dim, s, nc, col;
397   PetscInt         ncolors;
398   MPI_Comm         comm;
399   DMBoundaryType   bx;
400   ISColoringValue *colors;
401   DM_DA           *dd = (DM_DA *)da->data;
402 
403   PetscFunctionBegin;
404   /*
405          nc - number of components per grid point
406          col - number of colors needed in one direction for single component problem
407   */
408   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, &M, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
409   col = 2 * s + 1;
410   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
411   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
412   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
413 
414   /* create the coloring */
415   if (ctype == IS_COLORING_GLOBAL) {
416     if (!dd->localcoloring) {
417       PetscCall(PetscMalloc1(nc * nx, &colors));
418       if (dd->ofillcols) {
419         PetscInt tc = 0;
420         for (i = 0; i < nc; i++) tc += (PetscInt)(dd->ofillcols[i] > 0);
421         i1 = 0;
422         for (i = xs; i < xs + nx; i++) {
423           for (l = 0; l < nc; l++) {
424             if (dd->ofillcols[l] && (i % col)) {
425               colors[i1++] = nc - 1 + tc * ((i % col) - 1) + dd->ofillcols[l];
426             } else {
427               colors[i1++] = l;
428             }
429           }
430         }
431         ncolors = nc + 2 * s * tc;
432       } else {
433         i1 = 0;
434         for (i = xs; i < xs + nx; i++) {
435           for (l = 0; l < nc; l++) colors[i1++] = l + nc * (i % col);
436         }
437         ncolors = nc + nc * (col - 1);
438       }
439       PetscCall(ISColoringCreate(comm, ncolors, nc * nx, colors, PETSC_OWN_POINTER, &dd->localcoloring));
440     }
441     *coloring = dd->localcoloring;
442   } else if (ctype == IS_COLORING_LOCAL) {
443     if (!dd->ghostedcoloring) {
444       PetscCall(PetscMalloc1(nc * gnx, &colors));
445       i1 = 0;
446       for (i = gxs; i < gxs + gnx; i++) {
447         for (l = 0; l < nc; l++) {
448           /* the complicated stuff is to handle periodic boundaries */
449           colors[i1++] = l + nc * (SetInRange(i, m) % col);
450         }
451       }
452       ncolors = nc + nc * (col - 1);
453       PetscCall(ISColoringCreate(comm, ncolors, nc * gnx, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
454       PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
455     }
456     *coloring = dd->ghostedcoloring;
457   } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
458   PetscCall(ISColoringReference(*coloring));
459   PetscFunctionReturn(PETSC_SUCCESS);
460 }
461 
462 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
463 {
464   PetscInt         xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, dim, s, k, nc;
465   PetscInt         ncolors;
466   MPI_Comm         comm;
467   DMBoundaryType   bx, by;
468   ISColoringValue *colors;
469   DM_DA           *dd = (DM_DA *)da->data;
470 
471   PetscFunctionBegin;
472   /*
473          nc - number of components per grid point
474          col - number of colors needed in one direction for single component problem
475   */
476   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, NULL));
477   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
478   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
479   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
480   /* create the coloring */
481   if (ctype == IS_COLORING_GLOBAL) {
482     if (!dd->localcoloring) {
483       PetscCall(PetscMalloc1(nc * nx * ny, &colors));
484       ii = 0;
485       for (j = ys; j < ys + ny; j++) {
486         for (i = xs; i < xs + nx; i++) {
487           for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((3 * j + i) % 5);
488         }
489       }
490       ncolors = 5 * nc;
491       PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring));
492     }
493     *coloring = dd->localcoloring;
494   } else if (ctype == IS_COLORING_LOCAL) {
495     if (!dd->ghostedcoloring) {
496       PetscCall(PetscMalloc1(nc * gnx * gny, &colors));
497       ii = 0;
498       for (j = gys; j < gys + gny; j++) {
499         for (i = gxs; i < gxs + gnx; i++) {
500           for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((3 * SetInRange(j, n) + SetInRange(i, m)) % 5);
501         }
502       }
503       ncolors = 5 * nc;
504       PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
505       PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
506     }
507     *coloring = dd->ghostedcoloring;
508   } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
509   PetscFunctionReturn(PETSC_SUCCESS);
510 }
511 
512 /* =========================================================================== */
513 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM, Mat);
514 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM, Mat);
515 extern PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM, Mat);
516 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM, Mat);
517 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM, Mat);
518 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM, Mat);
519 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM, Mat);
520 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM, Mat);
521 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM, Mat);
522 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM, Mat);
523 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM, Mat);
524 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM, Mat);
525 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM, Mat);
526 extern PetscErrorCode DMCreateMatrix_DA_IS(DM, Mat);
527 
528 static PetscErrorCode MatView_MPI_DA(Mat A, PetscViewer viewer)
529 {
530   DM                da;
531   const char       *prefix;
532   Mat               AA, Anatural;
533   AO                ao;
534   PetscInt          rstart, rend, *petsc, i;
535   IS                is;
536   MPI_Comm          comm;
537   PetscViewerFormat format;
538   PetscBool         flag;
539 
540   PetscFunctionBegin;
541   /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */
542   PetscCall(PetscViewerGetFormat(viewer, &format));
543   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(PETSC_SUCCESS);
544 
545   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
546   PetscCall(MatGetDM(A, &da));
547   PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA");
548 
549   PetscCall(DMDAGetAO(da, &ao));
550   PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
551   PetscCall(PetscMalloc1(rend - rstart, &petsc));
552   for (i = rstart; i < rend; i++) petsc[i - rstart] = i;
553   PetscCall(AOApplicationToPetsc(ao, rend - rstart, petsc));
554   PetscCall(ISCreateGeneral(comm, rend - rstart, petsc, PETSC_OWN_POINTER, &is));
555 
556   /* call viewer on natural ordering */
557   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPISELL, &flag));
558   if (flag) {
559     PetscCall(MatConvert(A, MATAIJ, MAT_INITIAL_MATRIX, &AA));
560     A = AA;
561   }
562   PetscCall(MatCreateSubMatrix(A, is, is, MAT_INITIAL_MATRIX, &Anatural));
563   PetscCall(ISDestroy(&is));
564   PetscCall(PetscObjectGetOptionsPrefix((PetscObject)A, &prefix));
565   PetscCall(PetscObjectSetOptionsPrefix((PetscObject)Anatural, prefix));
566   PetscCall(PetscObjectSetName((PetscObject)Anatural, ((PetscObject)A)->name));
567   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE;
568   PetscCall(MatView(Anatural, viewer));
569   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE;
570   PetscCall(MatDestroy(&Anatural));
571   if (flag) PetscCall(MatDestroy(&AA));
572   PetscFunctionReturn(PETSC_SUCCESS);
573 }
574 
575 static PetscErrorCode MatLoad_MPI_DA(Mat A, PetscViewer viewer)
576 {
577   DM       da;
578   Mat      Anatural, Aapp;
579   AO       ao;
580   PetscInt rstart, rend, *app, i, m, n, M, N;
581   IS       is;
582   MPI_Comm comm;
583 
584   PetscFunctionBegin;
585   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
586   PetscCall(MatGetDM(A, &da));
587   PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA");
588 
589   /* Load the matrix in natural ordering */
590   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Anatural));
591   PetscCall(MatSetType(Anatural, ((PetscObject)A)->type_name));
592   PetscCall(MatGetSize(A, &M, &N));
593   PetscCall(MatGetLocalSize(A, &m, &n));
594   PetscCall(MatSetSizes(Anatural, m, n, M, N));
595   PetscCall(MatLoad(Anatural, viewer));
596 
597   /* Map natural ordering to application ordering and create IS */
598   PetscCall(DMDAGetAO(da, &ao));
599   PetscCall(MatGetOwnershipRange(Anatural, &rstart, &rend));
600   PetscCall(PetscMalloc1(rend - rstart, &app));
601   for (i = rstart; i < rend; i++) app[i - rstart] = i;
602   PetscCall(AOPetscToApplication(ao, rend - rstart, app));
603   PetscCall(ISCreateGeneral(comm, rend - rstart, app, PETSC_OWN_POINTER, &is));
604 
605   /* Do permutation and replace header */
606   PetscCall(MatCreateSubMatrix(Anatural, is, is, MAT_INITIAL_MATRIX, &Aapp));
607   PetscCall(MatHeaderReplace(A, &Aapp));
608   PetscCall(ISDestroy(&is));
609   PetscCall(MatDestroy(&Anatural));
610   PetscFunctionReturn(PETSC_SUCCESS);
611 }
612 
613 PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J)
614 {
615   PetscInt    dim, dof, nx, ny, nz, dims[3], starts[3], M, N, P;
616   Mat         A;
617   MPI_Comm    comm;
618   MatType     Atype;
619   MatType     mtype;
620   PetscMPIInt size;
621   DM_DA      *dd    = (DM_DA *)da->data;
622   void (*aij)(void) = NULL, (*baij)(void) = NULL, (*sbaij)(void) = NULL, (*sell)(void) = NULL, (*is)(void) = NULL;
623 
624   PetscFunctionBegin;
625   PetscCall(MatInitializePackage());
626   mtype = da->mattype;
627 
628   /*
629                                   m
630           ------------------------------------------------------
631          |                                                     |
632          |                                                     |
633          |               ----------------------                |
634          |               |                    |                |
635       n  |           ny  |                    |                |
636          |               |                    |                |
637          |               .---------------------                |
638          |             (xs,ys)     nx                          |
639          |            .                                        |
640          |         (gxs,gys)                                   |
641          |                                                     |
642           -----------------------------------------------------
643   */
644 
645   /*
646          nc - number of components per grid point
647          col - number of colors needed in one direction for single component problem
648   */
649   M   = dd->M;
650   N   = dd->N;
651   P   = dd->P;
652   dim = da->dim;
653   dof = dd->w;
654   /* PetscCall(DMDAGetInfo(da,&dim,&M,&N,&P,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL)); */
655   PetscCall(DMDAGetCorners(da, NULL, NULL, NULL, &nx, &ny, &nz));
656   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
657   PetscCall(MatCreate(comm, &A));
658   PetscCall(MatSetSizes(A, dof * nx * ny * nz, dof * nx * ny * nz, dof * M * N * P, dof * M * N * P));
659   PetscCall(MatSetType(A, mtype));
660   PetscCall(MatSetFromOptions(A));
661   if (dof * nx * ny * nz < da->bind_below) {
662     PetscCall(MatSetBindingPropagates(A, PETSC_TRUE));
663     PetscCall(MatBindToCPU(A, PETSC_TRUE));
664   }
665   PetscCall(MatSetDM(A, da));
666   if (da->structure_only) PetscCall(MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE));
667   PetscCall(MatGetType(A, &Atype));
668   /*
669      We do not provide a getmatrix function in the DMDA operations because
670    the basic DMDA does not know about matrices. We think of DMDA as being more
671    more low-level than matrices. This is kind of cheating but, cause sometimes
672    we think of DMDA has higher level than matrices.
673 
674      We could switch based on Atype (or mtype), but we do not since the
675    specialized setting routines depend only on the particular preallocation
676    details of the matrix, not the type itself.
677   */
678   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJSetPreallocation_C", &aij));
679   if (!aij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJSetPreallocation_C", &aij));
680   if (!aij) {
681     PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIBAIJSetPreallocation_C", &baij));
682     if (!baij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", &baij));
683     if (!baij) {
684       PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPISBAIJSetPreallocation_C", &sbaij));
685       if (!sbaij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqSBAIJSetPreallocation_C", &sbaij));
686       if (!sbaij) {
687         PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPISELLSetPreallocation_C", &sell));
688         if (!sell) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqSELLSetPreallocation_C", &sell));
689       }
690       if (!sell) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatISSetPreallocation_C", &is));
691     }
692   }
693   if (aij) {
694     if (dim == 1) {
695       if (dd->ofill) {
696         PetscCall(DMCreateMatrix_DA_1d_MPIAIJ_Fill(da, A));
697       } else {
698         DMBoundaryType bx;
699         PetscMPIInt    size;
700         PetscCall(DMDAGetInfo(da, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &bx, NULL, NULL, NULL));
701         PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size));
702         if (size == 1 && bx == DM_BOUNDARY_NONE) {
703           PetscCall(DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(da, A));
704         } else {
705           PetscCall(DMCreateMatrix_DA_1d_MPIAIJ(da, A));
706         }
707       }
708     } else if (dim == 2) {
709       if (dd->ofill) {
710         PetscCall(DMCreateMatrix_DA_2d_MPIAIJ_Fill(da, A));
711       } else {
712         PetscCall(DMCreateMatrix_DA_2d_MPIAIJ(da, A));
713       }
714     } else if (dim == 3) {
715       if (dd->ofill) {
716         PetscCall(DMCreateMatrix_DA_3d_MPIAIJ_Fill(da, A));
717       } else {
718         PetscCall(DMCreateMatrix_DA_3d_MPIAIJ(da, A));
719       }
720     }
721   } else if (baij) {
722     if (dim == 2) {
723       PetscCall(DMCreateMatrix_DA_2d_MPIBAIJ(da, A));
724     } else if (dim == 3) {
725       PetscCall(DMCreateMatrix_DA_3d_MPIBAIJ(da, A));
726     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
727   } else if (sbaij) {
728     if (dim == 2) {
729       PetscCall(DMCreateMatrix_DA_2d_MPISBAIJ(da, A));
730     } else if (dim == 3) {
731       PetscCall(DMCreateMatrix_DA_3d_MPISBAIJ(da, A));
732     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
733   } else if (sell) {
734     if (dim == 2) {
735       PetscCall(DMCreateMatrix_DA_2d_MPISELL(da, A));
736     } else if (dim == 3) {
737       PetscCall(DMCreateMatrix_DA_3d_MPISELL(da, A));
738     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
739   } else if (is) {
740     PetscCall(DMCreateMatrix_DA_IS(da, A));
741   } else {
742     ISLocalToGlobalMapping ltog;
743 
744     PetscCall(MatSetBlockSize(A, dof));
745     PetscCall(MatSetUp(A));
746     PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
747     PetscCall(MatSetLocalToGlobalMapping(A, ltog, ltog));
748   }
749   PetscCall(DMDAGetGhostCorners(da, &starts[0], &starts[1], &starts[2], &dims[0], &dims[1], &dims[2]));
750   PetscCall(MatSetStencil(A, dim, dims, starts, dof));
751   PetscCall(MatSetDM(A, da));
752   PetscCallMPI(MPI_Comm_size(comm, &size));
753   if (size > 1) {
754     /* change viewer to display matrix in natural ordering */
755     PetscCall(MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA));
756     PetscCall(MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA));
757   }
758   *J = A;
759   PetscFunctionReturn(PETSC_SUCCESS);
760 }
761 
762 PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]);
763 
764 PetscErrorCode DMCreateMatrix_DA_IS(DM dm, Mat J)
765 {
766   DM_DA                 *da = (DM_DA *)dm->data;
767   Mat                    lJ, P;
768   ISLocalToGlobalMapping ltog;
769   IS                     is;
770   PetscBT                bt;
771   const PetscInt        *e_loc, *idx;
772   PetscInt               i, nel, nen, nv, dof, *gidx, n, N;
773 
774   /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted)
775      We need to filter out the local indices that are not represented through the DMDAGetElements decomposition */
776   PetscFunctionBegin;
777   dof = da->w;
778   PetscCall(MatSetBlockSize(J, dof));
779   PetscCall(DMGetLocalToGlobalMapping(dm, &ltog));
780 
781   /* flag local elements indices in local DMDA numbering */
782   PetscCall(ISLocalToGlobalMappingGetSize(ltog, &nv));
783   PetscCall(PetscBTCreate(nv / dof, &bt));
784   PetscCall(DMDAGetElements(dm, &nel, &nen, &e_loc)); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */
785   for (i = 0; i < nel * nen; i++) PetscCall(PetscBTSet(bt, e_loc[i]));
786 
787   /* filter out (set to -1) the global indices not used by the local elements */
788   PetscCall(PetscMalloc1(nv / dof, &gidx));
789   PetscCall(ISLocalToGlobalMappingGetBlockIndices(ltog, &idx));
790   PetscCall(PetscArraycpy(gidx, idx, nv / dof));
791   PetscCall(ISLocalToGlobalMappingRestoreBlockIndices(ltog, &idx));
792   for (i = 0; i < nv / dof; i++)
793     if (!PetscBTLookup(bt, i)) gidx[i] = -1;
794   PetscCall(PetscBTDestroy(&bt));
795   PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)dm), dof, nv / dof, gidx, PETSC_OWN_POINTER, &is));
796   PetscCall(ISLocalToGlobalMappingCreateIS(is, &ltog));
797   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
798   PetscCall(ISLocalToGlobalMappingDestroy(&ltog));
799   PetscCall(ISDestroy(&is));
800 
801   /* Preallocation */
802   if (dm->prealloc_skip) {
803     PetscCall(MatSetUp(J));
804   } else {
805     PetscCall(MatISGetLocalMat(J, &lJ));
806     PetscCall(MatGetLocalToGlobalMapping(lJ, &ltog, NULL));
807     PetscCall(MatCreate(PetscObjectComm((PetscObject)lJ), &P));
808     PetscCall(MatSetType(P, MATPREALLOCATOR));
809     PetscCall(MatSetLocalToGlobalMapping(P, ltog, ltog));
810     PetscCall(MatGetSize(lJ, &N, NULL));
811     PetscCall(MatGetLocalSize(lJ, &n, NULL));
812     PetscCall(MatSetSizes(P, n, n, N, N));
813     PetscCall(MatSetBlockSize(P, dof));
814     PetscCall(MatSetUp(P));
815     for (i = 0; i < nel; i++) PetscCall(MatSetValuesBlockedLocal(P, nen, e_loc + i * nen, nen, e_loc + i * nen, NULL, INSERT_VALUES));
816     PetscCall(MatPreallocatorPreallocate(P, (PetscBool)!da->prealloc_only, lJ));
817     PetscCall(MatISRestoreLocalMat(J, &lJ));
818     PetscCall(DMDARestoreElements(dm, &nel, &nen, &e_loc));
819     PetscCall(MatDestroy(&P));
820 
821     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
822     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
823   }
824   PetscFunctionReturn(PETSC_SUCCESS);
825 }
826 
827 PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da, Mat J)
828 {
829   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p;
830   PetscInt               lstart, lend, pstart, pend, *dnz, *onz;
831   MPI_Comm               comm;
832   PetscScalar           *values;
833   DMBoundaryType         bx, by;
834   ISLocalToGlobalMapping ltog;
835   DMDAStencilType        st;
836 
837   PetscFunctionBegin;
838   /*
839          nc - number of components per grid point
840          col - number of colors needed in one direction for single component problem
841   */
842   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st));
843   col = 2 * s + 1;
844   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
845   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
846   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
847 
848   PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols));
849   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
850 
851   PetscCall(MatSetBlockSize(J, nc));
852   /* determine the matrix preallocation information */
853   MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
854   for (i = xs; i < xs + nx; i++) {
855     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
856     pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
857 
858     for (j = ys; j < ys + ny; j++) {
859       slot = i - gxs + gnx * (j - gys);
860 
861       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
862       lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
863 
864       cnt = 0;
865       for (k = 0; k < nc; k++) {
866         for (l = lstart; l < lend + 1; l++) {
867           for (p = pstart; p < pend + 1; p++) {
868             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
869               cols[cnt++] = k + nc * (slot + gnx * l + p);
870             }
871           }
872         }
873         rows[k] = k + nc * (slot);
874       }
875       PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
876     }
877   }
878   PetscCall(MatSetBlockSize(J, nc));
879   PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz));
880   PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz));
881   MatPreallocateEnd(dnz, onz);
882 
883   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
884 
885   /*
886     For each node in the grid: we get the neighbors in the local (on processor ordering
887     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
888     PETSc ordering.
889   */
890   if (!da->prealloc_only) {
891     PetscCall(PetscCalloc1(col * col * nc * nc, &values));
892     for (i = xs; i < xs + nx; i++) {
893       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
894       pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
895 
896       for (j = ys; j < ys + ny; j++) {
897         slot = i - gxs + gnx * (j - gys);
898 
899         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
900         lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
901 
902         cnt = 0;
903         for (k = 0; k < nc; k++) {
904           for (l = lstart; l < lend + 1; l++) {
905             for (p = pstart; p < pend + 1; p++) {
906               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
907                 cols[cnt++] = k + nc * (slot + gnx * l + p);
908               }
909             }
910           }
911           rows[k] = k + nc * (slot);
912         }
913         PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES));
914       }
915     }
916     PetscCall(PetscFree(values));
917     /* do not copy values to GPU since they are all zero and not yet needed there */
918     PetscCall(MatBindToCPU(J, PETSC_TRUE));
919     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
920     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
921     PetscCall(MatBindToCPU(J, PETSC_FALSE));
922     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
923   }
924   PetscCall(PetscFree2(rows, cols));
925   PetscFunctionReturn(PETSC_SUCCESS);
926 }
927 
928 PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da, Mat J)
929 {
930   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
931   PetscInt               m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL;
932   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
933   MPI_Comm               comm;
934   PetscScalar           *values;
935   DMBoundaryType         bx, by, bz;
936   ISLocalToGlobalMapping ltog;
937   DMDAStencilType        st;
938 
939   PetscFunctionBegin;
940   /*
941          nc - number of components per grid point
942          col - number of colors needed in one direction for single component problem
943   */
944   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
945   col = 2 * s + 1;
946   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
947   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
948   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
949 
950   PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols));
951   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
952 
953   PetscCall(MatSetBlockSize(J, nc));
954   /* determine the matrix preallocation information */
955   MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
956   for (i = xs; i < xs + nx; i++) {
957     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
958     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
959     for (j = ys; j < ys + ny; j++) {
960       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
961       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
962       for (k = zs; k < zs + nz; k++) {
963         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
964         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
965 
966         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
967 
968         cnt = 0;
969         for (l = 0; l < nc; l++) {
970           for (ii = istart; ii < iend + 1; ii++) {
971             for (jj = jstart; jj < jend + 1; jj++) {
972               for (kk = kstart; kk < kend + 1; kk++) {
973                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
974                   cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
975                 }
976               }
977             }
978           }
979           rows[l] = l + nc * (slot);
980         }
981         PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
982       }
983     }
984   }
985   PetscCall(MatSetBlockSize(J, nc));
986   PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz));
987   PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz));
988   MatPreallocateEnd(dnz, onz);
989   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
990 
991   /*
992     For each node in the grid: we get the neighbors in the local (on processor ordering
993     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
994     PETSc ordering.
995   */
996   if (!da->prealloc_only) {
997     PetscCall(PetscCalloc1(col * col * col * nc * nc * nc, &values));
998     for (i = xs; i < xs + nx; i++) {
999       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1000       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1001       for (j = ys; j < ys + ny; j++) {
1002         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1003         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1004         for (k = zs; k < zs + nz; k++) {
1005           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1006           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1007 
1008           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1009 
1010           cnt = 0;
1011           for (l = 0; l < nc; l++) {
1012             for (ii = istart; ii < iend + 1; ii++) {
1013               for (jj = jstart; jj < jend + 1; jj++) {
1014                 for (kk = kstart; kk < kend + 1; kk++) {
1015                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1016                     cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
1017                   }
1018                 }
1019               }
1020             }
1021             rows[l] = l + nc * (slot);
1022           }
1023           PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES));
1024         }
1025       }
1026     }
1027     PetscCall(PetscFree(values));
1028     /* do not copy values to GPU since they are all zero and not yet needed there */
1029     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1030     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1031     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1032     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1033     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1034   }
1035   PetscCall(PetscFree2(rows, cols));
1036   PetscFunctionReturn(PETSC_SUCCESS);
1037 }
1038 
1039 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da, Mat J)
1040 {
1041   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, M, N;
1042   PetscInt               lstart, lend, pstart, pend, *dnz, *onz;
1043   MPI_Comm               comm;
1044   DMBoundaryType         bx, by;
1045   ISLocalToGlobalMapping ltog, mltog;
1046   DMDAStencilType        st;
1047   PetscBool              removedups = PETSC_FALSE, alreadyboundtocpu = PETSC_TRUE;
1048 
1049   PetscFunctionBegin;
1050   /*
1051          nc - number of components per grid point
1052          col - number of colors needed in one direction for single component problem
1053   */
1054   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st));
1055   if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE));
1056   col = 2 * s + 1;
1057   /*
1058        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1059        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1060   */
1061   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1062   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1063   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1064   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1065   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1066 
1067   PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols));
1068   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1069 
1070   PetscCall(MatSetBlockSize(J, nc));
1071   /* determine the matrix preallocation information */
1072   MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
1073   for (i = xs; i < xs + nx; i++) {
1074     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1075     pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1076 
1077     for (j = ys; j < ys + ny; j++) {
1078       slot = i - gxs + gnx * (j - gys);
1079 
1080       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1081       lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1082 
1083       cnt = 0;
1084       for (k = 0; k < nc; k++) {
1085         for (l = lstart; l < lend + 1; l++) {
1086           for (p = pstart; p < pend + 1; p++) {
1087             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
1088               cols[cnt++] = k + nc * (slot + gnx * l + p);
1089             }
1090           }
1091         }
1092         rows[k] = k + nc * (slot);
1093       }
1094       if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1095       else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1096     }
1097   }
1098   PetscCall(MatSetBlockSize(J, nc));
1099   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
1100   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
1101   MatPreallocateEnd(dnz, onz);
1102   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1103   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1104 
1105   /*
1106     For each node in the grid: we get the neighbors in the local (on processor ordering
1107     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1108     PETSc ordering.
1109   */
1110   if (!da->prealloc_only) {
1111     for (i = xs; i < xs + nx; i++) {
1112       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1113       pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1114 
1115       for (j = ys; j < ys + ny; j++) {
1116         slot = i - gxs + gnx * (j - gys);
1117 
1118         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1119         lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1120 
1121         cnt = 0;
1122         for (l = lstart; l < lend + 1; l++) {
1123           for (p = pstart; p < pend + 1; p++) {
1124             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
1125               cols[cnt++] = nc * (slot + gnx * l + p);
1126               for (k = 1; k < nc; k++) {
1127                 cols[cnt] = 1 + cols[cnt - 1];
1128                 cnt++;
1129               }
1130             }
1131           }
1132         }
1133         for (k = 0; k < nc; k++) rows[k] = k + nc * (slot);
1134         PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1135       }
1136     }
1137     /* do not copy values to GPU since they are all zero and not yet needed there */
1138     PetscCall(MatBoundToCPU(J, &alreadyboundtocpu));
1139     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1140     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1141     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1142     if (!alreadyboundtocpu) PetscCall(MatBindToCPU(J, PETSC_FALSE));
1143     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1144     if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1145   }
1146   PetscCall(PetscFree2(rows, cols));
1147   PetscFunctionReturn(PETSC_SUCCESS);
1148 }
1149 
1150 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da, Mat J)
1151 {
1152   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1153   PetscInt               m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, M, N;
1154   PetscInt               lstart, lend, pstart, pend, *dnz, *onz;
1155   DM_DA                 *dd = (DM_DA *)da->data;
1156   PetscInt               ifill_col, *ofill = dd->ofill, *dfill = dd->dfill;
1157   MPI_Comm               comm;
1158   DMBoundaryType         bx, by;
1159   ISLocalToGlobalMapping ltog;
1160   DMDAStencilType        st;
1161   PetscBool              removedups = PETSC_FALSE;
1162 
1163   PetscFunctionBegin;
1164   /*
1165          nc - number of components per grid point
1166          col - number of colors needed in one direction for single component problem
1167   */
1168   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st));
1169   col = 2 * s + 1;
1170   /*
1171        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1172        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1173   */
1174   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1175   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1176   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1177   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1178   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1179 
1180   PetscCall(PetscMalloc1(col * col * nc, &cols));
1181   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1182 
1183   PetscCall(MatSetBlockSize(J, nc));
1184   /* determine the matrix preallocation information */
1185   MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
1186   for (i = xs; i < xs + nx; i++) {
1187     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1188     pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1189 
1190     for (j = ys; j < ys + ny; j++) {
1191       slot = i - gxs + gnx * (j - gys);
1192 
1193       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1194       lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1195 
1196       for (k = 0; k < nc; k++) {
1197         cnt = 0;
1198         for (l = lstart; l < lend + 1; l++) {
1199           for (p = pstart; p < pend + 1; p++) {
1200             if (l || p) {
1201               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */
1202                 for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p);
1203               }
1204             } else {
1205               if (dfill) {
1206                 for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p);
1207               } else {
1208                 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p);
1209               }
1210             }
1211           }
1212         }
1213         row    = k + nc * (slot);
1214         maxcnt = PetscMax(maxcnt, cnt);
1215         if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
1216         else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
1217       }
1218     }
1219   }
1220   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
1221   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
1222   MatPreallocateEnd(dnz, onz);
1223   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1224 
1225   /*
1226     For each node in the grid: we get the neighbors in the local (on processor ordering
1227     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1228     PETSc ordering.
1229   */
1230   if (!da->prealloc_only) {
1231     for (i = xs; i < xs + nx; i++) {
1232       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1233       pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1234 
1235       for (j = ys; j < ys + ny; j++) {
1236         slot = i - gxs + gnx * (j - gys);
1237 
1238         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1239         lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1240 
1241         for (k = 0; k < nc; k++) {
1242           cnt = 0;
1243           for (l = lstart; l < lend + 1; l++) {
1244             for (p = pstart; p < pend + 1; p++) {
1245               if (l || p) {
1246                 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */
1247                   for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p);
1248                 }
1249               } else {
1250                 if (dfill) {
1251                   for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p);
1252                 } else {
1253                   for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p);
1254                 }
1255               }
1256             }
1257           }
1258           row = k + nc * (slot);
1259           PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1260         }
1261       }
1262     }
1263     /* do not copy values to GPU since they are all zero and not yet needed there */
1264     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1265     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1266     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1267     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1268     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1269   }
1270   PetscCall(PetscFree(cols));
1271   PetscFunctionReturn(PETSC_SUCCESS);
1272 }
1273 
1274 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da, Mat J)
1275 {
1276   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1277   PetscInt               m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL;
1278   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
1279   MPI_Comm               comm;
1280   DMBoundaryType         bx, by, bz;
1281   ISLocalToGlobalMapping ltog, mltog;
1282   DMDAStencilType        st;
1283   PetscBool              removedups = PETSC_FALSE;
1284 
1285   PetscFunctionBegin;
1286   /*
1287          nc - number of components per grid point
1288          col - number of colors needed in one direction for single component problem
1289   */
1290   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
1291   if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE));
1292   col = 2 * s + 1;
1293 
1294   /*
1295        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1296        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1297   */
1298   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1299   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1300   if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE;
1301 
1302   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
1303   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
1304   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1305 
1306   PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols));
1307   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1308 
1309   PetscCall(MatSetBlockSize(J, nc));
1310   /* determine the matrix preallocation information */
1311   MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
1312   for (i = xs; i < xs + nx; i++) {
1313     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1314     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1315     for (j = ys; j < ys + ny; j++) {
1316       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1317       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1318       for (k = zs; k < zs + nz; k++) {
1319         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1320         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1321 
1322         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1323 
1324         cnt = 0;
1325         for (l = 0; l < nc; l++) {
1326           for (ii = istart; ii < iend + 1; ii++) {
1327             for (jj = jstart; jj < jend + 1; jj++) {
1328               for (kk = kstart; kk < kend + 1; kk++) {
1329                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1330                   cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
1331                 }
1332               }
1333             }
1334           }
1335           rows[l] = l + nc * (slot);
1336         }
1337         if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1338         else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1339       }
1340     }
1341   }
1342   PetscCall(MatSetBlockSize(J, nc));
1343   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
1344   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
1345   MatPreallocateEnd(dnz, onz);
1346   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1347   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1348 
1349   /*
1350     For each node in the grid: we get the neighbors in the local (on processor ordering
1351     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1352     PETSc ordering.
1353   */
1354   if (!da->prealloc_only) {
1355     for (i = xs; i < xs + nx; i++) {
1356       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1357       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1358       for (j = ys; j < ys + ny; j++) {
1359         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1360         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1361         for (k = zs; k < zs + nz; k++) {
1362           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1363           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1364 
1365           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1366 
1367           cnt = 0;
1368           for (kk = kstart; kk < kend + 1; kk++) {
1369             for (jj = jstart; jj < jend + 1; jj++) {
1370               for (ii = istart; ii < iend + 1; ii++) {
1371                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1372                   cols[cnt++] = nc * (slot + ii + gnx * jj + gnx * gny * kk);
1373                   for (l = 1; l < nc; l++) {
1374                     cols[cnt] = 1 + cols[cnt - 1];
1375                     cnt++;
1376                   }
1377                 }
1378               }
1379             }
1380           }
1381           rows[0] = nc * (slot);
1382           for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1383           PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1384         }
1385       }
1386     }
1387     /* do not copy values to GPU since they are all zero and not yet needed there */
1388     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1389     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1390     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1391     if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1392     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1393     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1394   }
1395   PetscCall(PetscFree2(rows, cols));
1396   PetscFunctionReturn(PETSC_SUCCESS);
1397 }
1398 
1399 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da, Mat J)
1400 {
1401   DM_DA                 *dd = (DM_DA *)da->data;
1402   PetscInt               xs, nx, i, j, gxs, gnx, row, k, l;
1403   PetscInt               m, dim, s, *cols = NULL, nc, cnt, maxcnt = 0, *ocols;
1404   PetscInt              *ofill = dd->ofill, *dfill = dd->dfill;
1405   DMBoundaryType         bx;
1406   ISLocalToGlobalMapping ltog;
1407   PetscMPIInt            rank, size;
1408 
1409   PetscFunctionBegin;
1410   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)da), &rank));
1411   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size));
1412 
1413   /*
1414          nc - number of components per grid point
1415   */
1416   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
1417   PetscCheck(s <= 1, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Matrix creation for 1d not implemented correctly for stencil width larger than 1");
1418   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
1419   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
1420 
1421   PetscCall(MatSetBlockSize(J, nc));
1422   PetscCall(PetscCalloc2(nx * nc, &cols, nx * nc, &ocols));
1423 
1424   /*
1425         note should be smaller for first and last process with no periodic
1426         does not handle dfill
1427   */
1428   cnt = 0;
1429   /* coupling with process to the left */
1430   for (i = 0; i < s; i++) {
1431     for (j = 0; j < nc; j++) {
1432       ocols[cnt] = ((rank == 0) ? 0 : (s - i) * (ofill[j + 1] - ofill[j]));
1433       cols[cnt]  = dfill[j + 1] - dfill[j] + (s + i) * (ofill[j + 1] - ofill[j]);
1434       if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1435         if (size > 1) ocols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]);
1436         else cols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]);
1437       }
1438       maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1439       cnt++;
1440     }
1441   }
1442   for (i = s; i < nx - s; i++) {
1443     for (j = 0; j < nc; j++) {
1444       cols[cnt] = dfill[j + 1] - dfill[j] + 2 * s * (ofill[j + 1] - ofill[j]);
1445       maxcnt    = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1446       cnt++;
1447     }
1448   }
1449   /* coupling with process to the right */
1450   for (i = nx - s; i < nx; i++) {
1451     for (j = 0; j < nc; j++) {
1452       ocols[cnt] = ((rank == (size - 1)) ? 0 : (i - nx + s + 1) * (ofill[j + 1] - ofill[j]));
1453       cols[cnt]  = dfill[j + 1] - dfill[j] + (s + nx - i - 1) * (ofill[j + 1] - ofill[j]);
1454       if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1455         if (size > 1) ocols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]);
1456         else cols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]);
1457       }
1458       maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1459       cnt++;
1460     }
1461   }
1462 
1463   PetscCall(MatSeqAIJSetPreallocation(J, 0, cols));
1464   PetscCall(MatMPIAIJSetPreallocation(J, 0, cols, 0, ocols));
1465   PetscCall(PetscFree2(cols, ocols));
1466 
1467   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1468   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1469 
1470   /*
1471     For each node in the grid: we get the neighbors in the local (on processor ordering
1472     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1473     PETSc ordering.
1474   */
1475   if (!da->prealloc_only) {
1476     PetscCall(PetscMalloc1(maxcnt, &cols));
1477     row = xs * nc;
1478     /* coupling with process to the left */
1479     for (i = xs; i < xs + s; i++) {
1480       for (j = 0; j < nc; j++) {
1481         cnt = 0;
1482         if (rank) {
1483           for (l = 0; l < s; l++) {
1484             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1485           }
1486         }
1487         if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1488           for (l = 0; l < s; l++) {
1489             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (m + i - s - l) * nc + ofill[k];
1490           }
1491         }
1492         if (dfill) {
1493           for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1494         } else {
1495           for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1496         }
1497         for (l = 0; l < s; l++) {
1498           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1499         }
1500         PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1501         row++;
1502       }
1503     }
1504     for (i = xs + s; i < xs + nx - s; i++) {
1505       for (j = 0; j < nc; j++) {
1506         cnt = 0;
1507         for (l = 0; l < s; l++) {
1508           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1509         }
1510         if (dfill) {
1511           for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1512         } else {
1513           for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1514         }
1515         for (l = 0; l < s; l++) {
1516           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1517         }
1518         PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1519         row++;
1520       }
1521     }
1522     /* coupling with process to the right */
1523     for (i = xs + nx - s; i < xs + nx; i++) {
1524       for (j = 0; j < nc; j++) {
1525         cnt = 0;
1526         for (l = 0; l < s; l++) {
1527           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1528         }
1529         if (dfill) {
1530           for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1531         } else {
1532           for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1533         }
1534         if (rank < size - 1) {
1535           for (l = 0; l < s; l++) {
1536             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1537           }
1538         }
1539         if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1540           for (l = 0; l < s; l++) {
1541             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s - l - m + 2) * nc + ofill[k];
1542           }
1543         }
1544         PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1545         row++;
1546       }
1547     }
1548     PetscCall(PetscFree(cols));
1549     /* do not copy values to GPU since they are all zero and not yet needed there */
1550     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1551     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1552     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1553     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1554     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1555   }
1556   PetscFunctionReturn(PETSC_SUCCESS);
1557 }
1558 
1559 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da, Mat J)
1560 {
1561   PetscInt               xs, nx, i, i1, slot, gxs, gnx;
1562   PetscInt               m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l;
1563   PetscInt               istart, iend;
1564   DMBoundaryType         bx;
1565   ISLocalToGlobalMapping ltog, mltog;
1566 
1567   PetscFunctionBegin;
1568   /*
1569          nc - number of components per grid point
1570          col - number of colors needed in one direction for single component problem
1571   */
1572   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
1573   if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE));
1574   col = 2 * s + 1;
1575 
1576   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
1577   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
1578 
1579   PetscCall(MatSetBlockSize(J, nc));
1580   PetscCall(MatSeqAIJSetPreallocation(J, col * nc, NULL));
1581   PetscCall(MatMPIAIJSetPreallocation(J, col * nc, NULL, col * nc, NULL));
1582 
1583   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1584   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1585   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1586 
1587   /*
1588     For each node in the grid: we get the neighbors in the local (on processor ordering
1589     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1590     PETSc ordering.
1591   */
1592   if (!da->prealloc_only) {
1593     PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols));
1594     for (i = xs; i < xs + nx; i++) {
1595       istart = PetscMax(-s, gxs - i);
1596       iend   = PetscMin(s, gxs + gnx - i - 1);
1597       slot   = i - gxs;
1598 
1599       cnt = 0;
1600       for (i1 = istart; i1 < iend + 1; i1++) {
1601         cols[cnt++] = nc * (slot + i1);
1602         for (l = 1; l < nc; l++) {
1603           cols[cnt] = 1 + cols[cnt - 1];
1604           cnt++;
1605         }
1606       }
1607       rows[0] = nc * (slot);
1608       for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1609       PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1610     }
1611     /* do not copy values to GPU since they are all zero and not yet needed there */
1612     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1613     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1614     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1615     if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1616     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1617     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1618     PetscCall(PetscFree2(rows, cols));
1619   }
1620   PetscFunctionReturn(PETSC_SUCCESS);
1621 }
1622 
1623 PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM da, Mat J)
1624 {
1625   PetscInt               xs, nx, i, i1, slot, gxs, gnx;
1626   PetscInt               m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l;
1627   PetscInt               istart, iend;
1628   DMBoundaryType         bx;
1629   ISLocalToGlobalMapping ltog, mltog;
1630 
1631   PetscFunctionBegin;
1632   /*
1633          nc - number of components per grid point
1634          col - number of colors needed in one direction for single component problem
1635   */
1636   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
1637   col = 2 * s + 1;
1638 
1639   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
1640   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
1641 
1642   PetscCall(MatSetBlockSize(J, nc));
1643   PetscCall(MatSeqAIJSetTotalPreallocation(J, nx * nc * col * nc));
1644 
1645   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1646   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1647   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1648 
1649   /*
1650     For each node in the grid: we get the neighbors in the local (on processor ordering
1651     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1652     PETSc ordering.
1653   */
1654   if (!da->prealloc_only) {
1655     PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols));
1656     for (i = xs; i < xs + nx; i++) {
1657       istart = PetscMax(-s, gxs - i);
1658       iend   = PetscMin(s, gxs + gnx - i - 1);
1659       slot   = i - gxs;
1660 
1661       cnt = 0;
1662       for (i1 = istart; i1 < iend + 1; i1++) {
1663         cols[cnt++] = nc * (slot + i1);
1664         for (l = 1; l < nc; l++) {
1665           cols[cnt] = 1 + cols[cnt - 1];
1666           cnt++;
1667         }
1668       }
1669       rows[0] = nc * (slot);
1670       for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1671       PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1672     }
1673     /* do not copy values to GPU since they are all zero and not yet needed there */
1674     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1675     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1676     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1677     if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1678     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1679     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1680     PetscCall(PetscFree2(rows, cols));
1681   }
1682   PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1683   PetscFunctionReturn(PETSC_SUCCESS);
1684 }
1685 
1686 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da, Mat J)
1687 {
1688   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1689   PetscInt               m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz;
1690   PetscInt               istart, iend, jstart, jend, ii, jj;
1691   MPI_Comm               comm;
1692   PetscScalar           *values;
1693   DMBoundaryType         bx, by;
1694   DMDAStencilType        st;
1695   ISLocalToGlobalMapping ltog;
1696 
1697   PetscFunctionBegin;
1698   /*
1699      nc - number of components per grid point
1700      col - number of colors needed in one direction for single component problem
1701   */
1702   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st));
1703   col = 2 * s + 1;
1704 
1705   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1706   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1707   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1708 
1709   PetscCall(PetscMalloc1(col * col * nc * nc, &cols));
1710 
1711   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1712 
1713   /* determine the matrix preallocation information */
1714   MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz);
1715   for (i = xs; i < xs + nx; i++) {
1716     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1717     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1718     for (j = ys; j < ys + ny; j++) {
1719       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1720       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1721       slot   = i - gxs + gnx * (j - gys);
1722 
1723       /* Find block columns in block row */
1724       cnt = 0;
1725       for (ii = istart; ii < iend + 1; ii++) {
1726         for (jj = jstart; jj < jend + 1; jj++) {
1727           if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1728             cols[cnt++] = slot + ii + gnx * jj;
1729           }
1730         }
1731       }
1732       PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz));
1733     }
1734   }
1735   PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz));
1736   PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
1737   MatPreallocateEnd(dnz, onz);
1738 
1739   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1740 
1741   /*
1742     For each node in the grid: we get the neighbors in the local (on processor ordering
1743     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1744     PETSc ordering.
1745   */
1746   if (!da->prealloc_only) {
1747     PetscCall(PetscCalloc1(col * col * nc * nc, &values));
1748     for (i = xs; i < xs + nx; i++) {
1749       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1750       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1751       for (j = ys; j < ys + ny; j++) {
1752         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1753         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1754         slot   = i - gxs + gnx * (j - gys);
1755         cnt    = 0;
1756         for (ii = istart; ii < iend + 1; ii++) {
1757           for (jj = jstart; jj < jend + 1; jj++) {
1758             if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1759               cols[cnt++] = slot + ii + gnx * jj;
1760             }
1761           }
1762         }
1763         PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
1764       }
1765     }
1766     PetscCall(PetscFree(values));
1767     /* do not copy values to GPU since they are all zero and not yet needed there */
1768     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1769     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1770     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1771     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1772     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1773   }
1774   PetscCall(PetscFree(cols));
1775   PetscFunctionReturn(PETSC_SUCCESS);
1776 }
1777 
1778 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da, Mat J)
1779 {
1780   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1781   PetscInt               m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz;
1782   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk;
1783   MPI_Comm               comm;
1784   PetscScalar           *values;
1785   DMBoundaryType         bx, by, bz;
1786   DMDAStencilType        st;
1787   ISLocalToGlobalMapping ltog;
1788 
1789   PetscFunctionBegin;
1790   /*
1791          nc - number of components per grid point
1792          col - number of colors needed in one direction for single component problem
1793   */
1794   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st));
1795   col = 2 * s + 1;
1796 
1797   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
1798   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
1799   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1800 
1801   PetscCall(PetscMalloc1(col * col * col, &cols));
1802 
1803   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1804 
1805   /* determine the matrix preallocation information */
1806   MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz);
1807   for (i = xs; i < xs + nx; i++) {
1808     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1809     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1810     for (j = ys; j < ys + ny; j++) {
1811       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1812       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1813       for (k = zs; k < zs + nz; k++) {
1814         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1815         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1816 
1817         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1818 
1819         /* Find block columns in block row */
1820         cnt = 0;
1821         for (ii = istart; ii < iend + 1; ii++) {
1822           for (jj = jstart; jj < jend + 1; jj++) {
1823             for (kk = kstart; kk < kend + 1; kk++) {
1824               if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1825                 cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
1826               }
1827             }
1828           }
1829         }
1830         PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz));
1831       }
1832     }
1833   }
1834   PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz));
1835   PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
1836   MatPreallocateEnd(dnz, onz);
1837 
1838   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1839 
1840   /*
1841     For each node in the grid: we get the neighbors in the local (on processor ordering
1842     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1843     PETSc ordering.
1844   */
1845   if (!da->prealloc_only) {
1846     PetscCall(PetscCalloc1(col * col * col * nc * nc, &values));
1847     for (i = xs; i < xs + nx; i++) {
1848       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1849       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1850       for (j = ys; j < ys + ny; j++) {
1851         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1852         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1853         for (k = zs; k < zs + nz; k++) {
1854           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1855           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1856 
1857           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1858 
1859           cnt = 0;
1860           for (ii = istart; ii < iend + 1; ii++) {
1861             for (jj = jstart; jj < jend + 1; jj++) {
1862               for (kk = kstart; kk < kend + 1; kk++) {
1863                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1864                   cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
1865                 }
1866               }
1867             }
1868           }
1869           PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
1870         }
1871       }
1872     }
1873     PetscCall(PetscFree(values));
1874     /* do not copy values to GPU since they are all zero and not yet needed there */
1875     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1876     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1877     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1878     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1879     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1880   }
1881   PetscCall(PetscFree(cols));
1882   PetscFunctionReturn(PETSC_SUCCESS);
1883 }
1884 
1885 /*
1886   This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to
1887   identify in the local ordering with periodic domain.
1888 */
1889 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog, PetscInt *row, PetscInt *cnt, PetscInt col[])
1890 {
1891   PetscInt i, n;
1892 
1893   PetscFunctionBegin;
1894   PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, 1, row, row));
1895   PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, *cnt, col, col));
1896   for (i = 0, n = 0; i < *cnt; i++) {
1897     if (col[i] >= *row) col[n++] = col[i];
1898   }
1899   *cnt = n;
1900   PetscFunctionReturn(PETSC_SUCCESS);
1901 }
1902 
1903 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da, Mat J)
1904 {
1905   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1906   PetscInt               m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz;
1907   PetscInt               istart, iend, jstart, jend, ii, jj;
1908   MPI_Comm               comm;
1909   PetscScalar           *values;
1910   DMBoundaryType         bx, by;
1911   DMDAStencilType        st;
1912   ISLocalToGlobalMapping ltog;
1913 
1914   PetscFunctionBegin;
1915   /*
1916      nc - number of components per grid point
1917      col - number of colors needed in one direction for single component problem
1918   */
1919   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st));
1920   col = 2 * s + 1;
1921 
1922   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1923   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1924   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1925 
1926   PetscCall(PetscMalloc1(col * col * nc * nc, &cols));
1927 
1928   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1929 
1930   /* determine the matrix preallocation information */
1931   MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz);
1932   for (i = xs; i < xs + nx; i++) {
1933     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1934     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1935     for (j = ys; j < ys + ny; j++) {
1936       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1937       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1938       slot   = i - gxs + gnx * (j - gys);
1939 
1940       /* Find block columns in block row */
1941       cnt = 0;
1942       for (ii = istart; ii < iend + 1; ii++) {
1943         for (jj = jstart; jj < jend + 1; jj++) {
1944           if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj;
1945         }
1946       }
1947       PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
1948       PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz));
1949     }
1950   }
1951   PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz));
1952   PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
1953   MatPreallocateEnd(dnz, onz);
1954 
1955   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1956 
1957   /*
1958     For each node in the grid: we get the neighbors in the local (on processor ordering
1959     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1960     PETSc ordering.
1961   */
1962   if (!da->prealloc_only) {
1963     PetscCall(PetscCalloc1(col * col * nc * nc, &values));
1964     for (i = xs; i < xs + nx; i++) {
1965       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1966       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1967       for (j = ys; j < ys + ny; j++) {
1968         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1969         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1970         slot   = i - gxs + gnx * (j - gys);
1971 
1972         /* Find block columns in block row */
1973         cnt = 0;
1974         for (ii = istart; ii < iend + 1; ii++) {
1975           for (jj = jstart; jj < jend + 1; jj++) {
1976             if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj;
1977           }
1978         }
1979         PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
1980         PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
1981       }
1982     }
1983     PetscCall(PetscFree(values));
1984     /* do not copy values to GPU since they are all zero and not yet needed there */
1985     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1986     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1987     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1988     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1989     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1990   }
1991   PetscCall(PetscFree(cols));
1992   PetscFunctionReturn(PETSC_SUCCESS);
1993 }
1994 
1995 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da, Mat J)
1996 {
1997   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1998   PetscInt               m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz;
1999   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk;
2000   MPI_Comm               comm;
2001   PetscScalar           *values;
2002   DMBoundaryType         bx, by, bz;
2003   DMDAStencilType        st;
2004   ISLocalToGlobalMapping ltog;
2005 
2006   PetscFunctionBegin;
2007   /*
2008      nc - number of components per grid point
2009      col - number of colors needed in one direction for single component problem
2010   */
2011   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st));
2012   col = 2 * s + 1;
2013 
2014   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
2015   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
2016   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
2017 
2018   /* create the matrix */
2019   PetscCall(PetscMalloc1(col * col * col, &cols));
2020 
2021   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
2022 
2023   /* determine the matrix preallocation information */
2024   MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz);
2025   for (i = xs; i < xs + nx; i++) {
2026     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2027     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2028     for (j = ys; j < ys + ny; j++) {
2029       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2030       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2031       for (k = zs; k < zs + nz; k++) {
2032         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2033         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2034 
2035         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2036 
2037         /* Find block columns in block row */
2038         cnt = 0;
2039         for (ii = istart; ii < iend + 1; ii++) {
2040           for (jj = jstart; jj < jend + 1; jj++) {
2041             for (kk = kstart; kk < kend + 1; kk++) {
2042               if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
2043             }
2044           }
2045         }
2046         PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
2047         PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz));
2048       }
2049     }
2050   }
2051   PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz));
2052   PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
2053   MatPreallocateEnd(dnz, onz);
2054 
2055   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
2056 
2057   /*
2058     For each node in the grid: we get the neighbors in the local (on processor ordering
2059     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2060     PETSc ordering.
2061   */
2062   if (!da->prealloc_only) {
2063     PetscCall(PetscCalloc1(col * col * col * nc * nc, &values));
2064     for (i = xs; i < xs + nx; i++) {
2065       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2066       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2067       for (j = ys; j < ys + ny; j++) {
2068         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2069         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2070         for (k = zs; k < zs + nz; k++) {
2071           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2072           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2073 
2074           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2075 
2076           cnt = 0;
2077           for (ii = istart; ii < iend + 1; ii++) {
2078             for (jj = jstart; jj < jend + 1; jj++) {
2079               for (kk = kstart; kk < kend + 1; kk++) {
2080                 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
2081               }
2082             }
2083           }
2084           PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
2085           PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
2086         }
2087       }
2088     }
2089     PetscCall(PetscFree(values));
2090     /* do not copy values to GPU since they are all zero and not yet needed there */
2091     PetscCall(MatBindToCPU(J, PETSC_TRUE));
2092     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
2093     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
2094     PetscCall(MatBindToCPU(J, PETSC_FALSE));
2095     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2096   }
2097   PetscCall(PetscFree(cols));
2098   PetscFunctionReturn(PETSC_SUCCESS);
2099 }
2100 
2101 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da, Mat J)
2102 {
2103   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
2104   PetscInt               m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, *dnz, *onz;
2105   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
2106   DM_DA                 *dd = (DM_DA *)da->data;
2107   PetscInt               ifill_col, *dfill = dd->dfill, *ofill = dd->ofill;
2108   MPI_Comm               comm;
2109   PetscScalar           *values;
2110   DMBoundaryType         bx, by, bz;
2111   ISLocalToGlobalMapping ltog;
2112   DMDAStencilType        st;
2113   PetscBool              removedups = PETSC_FALSE;
2114 
2115   PetscFunctionBegin;
2116   /*
2117          nc - number of components per grid point
2118          col - number of colors needed in one direction for single component problem
2119   */
2120   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
2121   col = 2 * s + 1;
2122 
2123   /*
2124        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
2125        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
2126   */
2127   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
2128   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
2129   if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE;
2130 
2131   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
2132   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
2133   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
2134 
2135   PetscCall(PetscMalloc1(col * col * col * nc, &cols));
2136   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
2137 
2138   /* determine the matrix preallocation information */
2139   MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
2140 
2141   PetscCall(MatSetBlockSize(J, nc));
2142   for (i = xs; i < xs + nx; i++) {
2143     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2144     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2145     for (j = ys; j < ys + ny; j++) {
2146       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2147       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2148       for (k = zs; k < zs + nz; k++) {
2149         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2150         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2151 
2152         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2153 
2154         for (l = 0; l < nc; l++) {
2155           cnt = 0;
2156           for (ii = istart; ii < iend + 1; ii++) {
2157             for (jj = jstart; jj < jend + 1; jj++) {
2158               for (kk = kstart; kk < kend + 1; kk++) {
2159                 if (ii || jj || kk) {
2160                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
2161                     for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2162                   }
2163                 } else {
2164                   if (dfill) {
2165                     for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2166                   } else {
2167                     for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2168                   }
2169                 }
2170               }
2171             }
2172           }
2173           row    = l + nc * (slot);
2174           maxcnt = PetscMax(maxcnt, cnt);
2175           if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
2176           else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
2177         }
2178       }
2179     }
2180   }
2181   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
2182   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
2183   MatPreallocateEnd(dnz, onz);
2184   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
2185 
2186   /*
2187     For each node in the grid: we get the neighbors in the local (on processor ordering
2188     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2189     PETSc ordering.
2190   */
2191   if (!da->prealloc_only) {
2192     PetscCall(PetscCalloc1(maxcnt, &values));
2193     for (i = xs; i < xs + nx; i++) {
2194       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2195       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2196       for (j = ys; j < ys + ny; j++) {
2197         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2198         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2199         for (k = zs; k < zs + nz; k++) {
2200           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2201           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2202 
2203           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2204 
2205           for (l = 0; l < nc; l++) {
2206             cnt = 0;
2207             for (ii = istart; ii < iend + 1; ii++) {
2208               for (jj = jstart; jj < jend + 1; jj++) {
2209                 for (kk = kstart; kk < kend + 1; kk++) {
2210                   if (ii || jj || kk) {
2211                     if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
2212                       for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2213                     }
2214                   } else {
2215                     if (dfill) {
2216                       for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2217                     } else {
2218                       for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2219                     }
2220                   }
2221                 }
2222               }
2223             }
2224             row = l + nc * (slot);
2225             PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, values, INSERT_VALUES));
2226           }
2227         }
2228       }
2229     }
2230     PetscCall(PetscFree(values));
2231     /* do not copy values to GPU since they are all zero and not yet needed there */
2232     PetscCall(MatBindToCPU(J, PETSC_TRUE));
2233     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
2234     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
2235     PetscCall(MatBindToCPU(J, PETSC_FALSE));
2236     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2237   }
2238   PetscCall(PetscFree(cols));
2239   PetscFunctionReturn(PETSC_SUCCESS);
2240 }
2241