xref: /petsc/src/dm/impls/da/fdda.c (revision 6dd63270497ad23dcf16ae500a87ff2b2a0b7474)
1 #include <petsc/private/dmdaimpl.h> /*I      "petscdmda.h"     I*/
2 #include <petscmat.h>
3 #include <petscbt.h>
4 
5 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM, ISColoringType, ISColoring *);
6 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM, ISColoringType, ISColoring *);
7 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM, ISColoringType, ISColoring *);
8 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM, ISColoringType, ISColoring *);
9 
10 /*
11    For ghost i that may be negative or greater than the upper bound this
12   maps it into the 0:m-1 range using periodicity
13 */
14 #define SetInRange(i, m) ((i < 0) ? m + i : ((i >= m) ? i - m : i))
15 
16 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill, PetscInt w, PetscInt **rfill)
17 {
18   PetscInt i, j, nz, *fill;
19 
20   PetscFunctionBegin;
21   if (!dfill) PetscFunctionReturn(PETSC_SUCCESS);
22 
23   /* count number nonzeros */
24   nz = 0;
25   for (i = 0; i < w; i++) {
26     for (j = 0; j < w; j++) {
27       if (dfill[w * i + j]) nz++;
28     }
29   }
30   PetscCall(PetscMalloc1(nz + w + 1, &fill));
31   /* construct modified CSR storage of nonzero structure */
32   /*  fill[0 -- w] marks starts of each row of column indices (and end of last row)
33    so fill[1] - fill[0] gives number of nonzeros in first row etc */
34   nz = w + 1;
35   for (i = 0; i < w; i++) {
36     fill[i] = nz;
37     for (j = 0; j < w; j++) {
38       if (dfill[w * i + j]) {
39         fill[nz] = j;
40         nz++;
41       }
42     }
43   }
44   fill[w] = nz;
45 
46   *rfill = fill;
47   PetscFunctionReturn(PETSC_SUCCESS);
48 }
49 
50 static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse, PetscInt w, PetscInt **rfill)
51 {
52   PetscInt nz;
53 
54   PetscFunctionBegin;
55   if (!dfillsparse) PetscFunctionReturn(PETSC_SUCCESS);
56 
57   /* Determine number of non-zeros */
58   nz = (dfillsparse[w] - w - 1);
59 
60   /* Allocate space for our copy of the given sparse matrix representation. */
61   PetscCall(PetscMalloc1(nz + w + 1, rfill));
62   PetscCall(PetscArraycpy(*rfill, dfillsparse, nz + w + 1));
63   PetscFunctionReturn(PETSC_SUCCESS);
64 }
65 
66 static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd)
67 {
68   PetscInt i, k, cnt = 1;
69 
70   PetscFunctionBegin;
71   /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of
72    columns to the left with any nonzeros in them plus 1 */
73   PetscCall(PetscCalloc1(dd->w, &dd->ofillcols));
74   for (i = 0; i < dd->w; i++) {
75     for (k = dd->ofill[i]; k < dd->ofill[i + 1]; k++) dd->ofillcols[dd->ofill[k]] = 1;
76   }
77   for (i = 0; i < dd->w; i++) {
78     if (dd->ofillcols[i]) dd->ofillcols[i] = cnt++;
79   }
80   PetscFunctionReturn(PETSC_SUCCESS);
81 }
82 
83 /*@
84   DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem
85   of the matrix returned by `DMCreateMatrix()`.
86 
87   Logically Collective
88 
89   Input Parameters:
90 + da    - the `DMDA`
91 . dfill - the fill pattern in the diagonal block (may be `NULL`, means use dense block)
92 - ofill - the fill pattern in the off-diagonal blocks
93 
94   Level: developer
95 
96   Notes:
97   This only makes sense when you are doing multicomponent problems but using the
98   `MATMPIAIJ` matrix format
99 
100   The format for `dfill` and `ofill` is a 2 dimensional dof by dof matrix with 1 entries
101   representing coupling and 0 entries for missing coupling. For example
102 .vb
103             dfill[9] = {1, 0, 0,
104                         1, 1, 0,
105                         0, 1, 1}
106 .ve
107   means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with
108   itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the
109   diagonal block).
110 
111   `DMDASetGetMatrix()` allows you to provide general code for those more complicated nonzero patterns then
112   can be represented in the `dfill`, `ofill` format
113 
114   Contributed by\:
115   Glenn Hammond
116 
117 .seealso: [](sec_struct), `DM`, `DMDA`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()`, `DMDASetBlockFillsSparse()`
118 @*/
119 PetscErrorCode DMDASetBlockFills(DM da, const PetscInt *dfill, const PetscInt *ofill)
120 {
121   DM_DA *dd = (DM_DA *)da->data;
122 
123   PetscFunctionBegin;
124   /* save the given dfill and ofill information */
125   PetscCall(DMDASetBlockFills_Private(dfill, dd->w, &dd->dfill));
126   PetscCall(DMDASetBlockFills_Private(ofill, dd->w, &dd->ofill));
127 
128   /* count nonzeros in ofill columns */
129   PetscCall(DMDASetBlockFills_Private2(dd));
130   PetscFunctionReturn(PETSC_SUCCESS);
131 }
132 
133 /*@
134   DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem
135   of the matrix returned by `DMCreateMatrix()`, using sparse representations
136   of fill patterns.
137 
138   Logically Collective
139 
140   Input Parameters:
141 + da          - the `DMDA`
142 . dfillsparse - the sparse fill pattern in the diagonal block (may be `NULL`, means use dense block)
143 - ofillsparse - the sparse fill pattern in the off-diagonal blocks
144 
145   Level: developer
146 
147   Notes:
148   This only makes sense when you are doing multicomponent problems but using the
149   `MATMPIAIJ` matrix format
150 
151   The format for `dfill` and `ofill` is a sparse representation of a
152   dof-by-dof matrix with 1 entries representing coupling and 0 entries
153   for missing coupling.  The sparse representation is a 1 dimensional
154   array of length nz + dof + 1, where nz is the number of non-zeros in
155   the matrix.  The first dof entries in the array give the
156   starting array indices of each row's items in the rest of the array,
157   the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array)
158   and the remaining nz items give the column indices of each of
159   the 1s within the logical 2D matrix.  Each row's items within
160   the array are the column indices of the 1s within that row
161   of the 2D matrix.  PETSc developers may recognize that this is the
162   same format as that computed by the `DMDASetBlockFills_Private()`
163   function from a dense 2D matrix representation.
164 
165   `DMDASetGetMatrix()` allows you to provide general code for those more complicated nonzero patterns then
166   can be represented in the `dfill`, `ofill` format
167 
168   Contributed by\:
169   Philip C. Roth
170 
171 .seealso: [](sec_struct), `DM`, `DMDA`, `DMDASetBlockFills()`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()`
172 @*/
173 PetscErrorCode DMDASetBlockFillsSparse(DM da, const PetscInt *dfillsparse, const PetscInt *ofillsparse)
174 {
175   DM_DA *dd = (DM_DA *)da->data;
176 
177   PetscFunctionBegin;
178   /* save the given dfill and ofill information */
179   PetscCall(DMDASetBlockFillsSparse_Private(dfillsparse, dd->w, &dd->dfill));
180   PetscCall(DMDASetBlockFillsSparse_Private(ofillsparse, dd->w, &dd->ofill));
181 
182   /* count nonzeros in ofill columns */
183   PetscCall(DMDASetBlockFills_Private2(dd));
184   PetscFunctionReturn(PETSC_SUCCESS);
185 }
186 
187 PetscErrorCode DMCreateColoring_DA(DM da, ISColoringType ctype, ISColoring *coloring)
188 {
189   PetscInt       dim, m, n, p, nc;
190   DMBoundaryType bx, by, bz;
191   MPI_Comm       comm;
192   PetscMPIInt    size;
193   PetscBool      isBAIJ;
194   DM_DA         *dd = (DM_DA *)da->data;
195 
196   PetscFunctionBegin;
197   /*
198                                   m
199           ------------------------------------------------------
200          |                                                     |
201          |                                                     |
202          |               ----------------------                |
203          |               |                    |                |
204       n  |           yn  |                    |                |
205          |               |                    |                |
206          |               .---------------------                |
207          |             (xs,ys)     xn                          |
208          |            .                                        |
209          |         (gxs,gys)                                   |
210          |                                                     |
211           -----------------------------------------------------
212   */
213 
214   /*
215          nc - number of components per grid point
216          col - number of colors needed in one direction for single component problem
217 
218   */
219   PetscCall(DMDAGetInfo(da, &dim, NULL, NULL, NULL, &m, &n, &p, &nc, NULL, &bx, &by, &bz, NULL));
220 
221   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
222   PetscCallMPI(MPI_Comm_size(comm, &size));
223   if (ctype == IS_COLORING_LOCAL) {
224     PetscCheck(!((m == 1 && bx == DM_BOUNDARY_PERIODIC) || (n == 1 && by == DM_BOUNDARY_PERIODIC) || (p == 1 && bz == DM_BOUNDARY_PERIODIC)), PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "IS_COLORING_LOCAL cannot be used for periodic boundary condition having both sides of the domain on the same process");
225   }
226 
227   /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ
228      matrices is for the blocks, not the individual matrix elements  */
229   PetscCall(PetscStrbeginswith(da->mattype, MATBAIJ, &isBAIJ));
230   if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATMPIBAIJ, &isBAIJ));
231   if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATSEQBAIJ, &isBAIJ));
232   if (isBAIJ) {
233     dd->w  = 1;
234     dd->xs = dd->xs / nc;
235     dd->xe = dd->xe / nc;
236     dd->Xs = dd->Xs / nc;
237     dd->Xe = dd->Xe / nc;
238   }
239 
240   /*
241      We do not provide a getcoloring function in the DMDA operations because
242    the basic DMDA does not know about matrices. We think of DMDA as being
243    more low-level then matrices.
244   */
245   if (dim == 1) PetscCall(DMCreateColoring_DA_1d_MPIAIJ(da, ctype, coloring));
246   else if (dim == 2) PetscCall(DMCreateColoring_DA_2d_MPIAIJ(da, ctype, coloring));
247   else if (dim == 3) PetscCall(DMCreateColoring_DA_3d_MPIAIJ(da, ctype, coloring));
248   else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not done for %" PetscInt_FMT " dimension, send us mail petsc-maint@mcs.anl.gov for code", dim);
249   if (isBAIJ) {
250     dd->w  = nc;
251     dd->xs = dd->xs * nc;
252     dd->xe = dd->xe * nc;
253     dd->Xs = dd->Xs * nc;
254     dd->Xe = dd->Xe * nc;
255   }
256   PetscFunctionReturn(PETSC_SUCCESS);
257 }
258 
259 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
260 {
261   PetscInt         xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, M, N, dim, s, k, nc, col;
262   PetscInt         ncolors = 0;
263   MPI_Comm         comm;
264   DMBoundaryType   bx, by;
265   DMDAStencilType  st;
266   ISColoringValue *colors;
267   DM_DA           *dd = (DM_DA *)da->data;
268 
269   PetscFunctionBegin;
270   /*
271          nc - number of components per grid point
272          col - number of colors needed in one direction for single component problem
273 
274   */
275   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st));
276   col = 2 * s + 1;
277   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
278   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
279   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
280 
281   /* special case as taught to us by Paul Hovland */
282   if (st == DMDA_STENCIL_STAR && s == 1) {
283     PetscCall(DMCreateColoring_DA_2d_5pt_MPIAIJ(da, ctype, coloring));
284   } else {
285     if (ctype == IS_COLORING_GLOBAL) {
286       if (!dd->localcoloring) {
287         PetscCall(PetscMalloc1(nc * nx * ny, &colors));
288         ii = 0;
289         for (j = ys; j < ys + ny; j++) {
290           for (i = xs; i < xs + nx; i++) {
291             for (k = 0; k < nc; k++) PetscCall(ISColoringValueCast(k + nc * ((i % col) + col * (j % col)), colors + ii++));
292           }
293         }
294         ncolors = nc + nc * (col - 1 + col * (col - 1));
295         PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring));
296       }
297       *coloring = dd->localcoloring;
298     } else if (ctype == IS_COLORING_LOCAL) {
299       if (!dd->ghostedcoloring) {
300         PetscCall(PetscMalloc1(nc * gnx * gny, &colors));
301         ii = 0;
302         for (j = gys; j < gys + gny; j++) {
303           for (i = gxs; i < gxs + gnx; i++) {
304             for (k = 0; k < nc; k++) {
305               /* the complicated stuff is to handle periodic boundaries */
306               PetscCall(ISColoringValueCast(k + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col)), colors + ii++));
307             }
308           }
309         }
310         ncolors = nc + nc * (col - 1 + col * (col - 1));
311         PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
312         /* PetscIntView(ncolors,(PetscInt*)colors,0); */
313 
314         PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
315       }
316       *coloring = dd->ghostedcoloring;
317     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
318   }
319   PetscCall(ISColoringReference(*coloring));
320   PetscFunctionReturn(PETSC_SUCCESS);
321 }
322 
323 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
324 {
325   PetscInt         xs, ys, nx, ny, i, j, gxs, gys, gnx, gny, m, n, p, dim, s, k, nc, col, zs, gzs, ii, l, nz, gnz, M, N, P;
326   PetscInt         ncolors;
327   MPI_Comm         comm;
328   DMBoundaryType   bx, by, bz;
329   DMDAStencilType  st;
330   ISColoringValue *colors;
331   DM_DA           *dd = (DM_DA *)da->data;
332 
333   PetscFunctionBegin;
334   /*
335          nc - number of components per grid point
336          col - number of colors needed in one direction for single component problem
337   */
338   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
339   col = 2 * s + 1;
340   PetscCheck(bx != DM_BOUNDARY_PERIODIC || (m % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in X is divisible by 2*stencil_width + 1");
341   PetscCheck(by != DM_BOUNDARY_PERIODIC || (n % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Y is divisible by 2*stencil_width + 1");
342   PetscCheck(bz != DM_BOUNDARY_PERIODIC || (p % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Z is divisible by 2*stencil_width + 1");
343 
344   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
345   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
346   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
347 
348   /* create the coloring */
349   if (ctype == IS_COLORING_GLOBAL) {
350     if (!dd->localcoloring) {
351       PetscCall(PetscMalloc1(nc * nx * ny * nz, &colors));
352       ii = 0;
353       for (k = zs; k < zs + nz; k++) {
354         for (j = ys; j < ys + ny; j++) {
355           for (i = xs; i < xs + nx; i++) {
356             for (l = 0; l < nc; l++) PetscCall(ISColoringValueCast(l + nc * ((i % col) + col * (j % col) + col * col * (k % col)), colors + ii++));
357           }
358         }
359       }
360       ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1));
361       PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny * nz, colors, PETSC_OWN_POINTER, &dd->localcoloring));
362     }
363     *coloring = dd->localcoloring;
364   } else if (ctype == IS_COLORING_LOCAL) {
365     if (!dd->ghostedcoloring) {
366       PetscCall(PetscMalloc1(nc * gnx * gny * gnz, &colors));
367       ii = 0;
368       for (k = gzs; k < gzs + gnz; k++) {
369         for (j = gys; j < gys + gny; j++) {
370           for (i = gxs; i < gxs + gnx; i++) {
371             for (l = 0; l < nc; l++) {
372               /* the complicated stuff is to handle periodic boundaries */
373               PetscCall(ISColoringValueCast(l + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col) + col * col * (SetInRange(k, p) % col)), colors + ii++));
374             }
375           }
376         }
377       }
378       ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1));
379       PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny * gnz, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
380       PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
381     }
382     *coloring = dd->ghostedcoloring;
383   } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
384   PetscCall(ISColoringReference(*coloring));
385   PetscFunctionReturn(PETSC_SUCCESS);
386 }
387 
388 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
389 {
390   PetscInt         xs, nx, i, i1, gxs, gnx, l, m, M, dim, s, nc, col;
391   PetscInt         ncolors;
392   MPI_Comm         comm;
393   DMBoundaryType   bx;
394   ISColoringValue *colors;
395   DM_DA           *dd = (DM_DA *)da->data;
396 
397   PetscFunctionBegin;
398   /*
399          nc - number of components per grid point
400          col - number of colors needed in one direction for single component problem
401   */
402   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, &M, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
403   col = 2 * s + 1;
404   PetscCheck(bx != DM_BOUNDARY_PERIODIC || !(m % col), PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "IS_COLORING_GLOBAL can only be used for periodic boundary conditions if the number of grid points %" PetscInt_FMT " is divisible by the number of colors %" PetscInt_FMT, m, col);
405   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
406   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
407   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
408 
409   /* create the coloring */
410   if (ctype == IS_COLORING_GLOBAL) {
411     if (!dd->localcoloring) {
412       PetscCall(PetscMalloc1(nc * nx, &colors));
413       if (dd->ofillcols) {
414         PetscInt tc = 0;
415         for (i = 0; i < nc; i++) tc += (PetscInt)(dd->ofillcols[i] > 0);
416         i1 = 0;
417         for (i = xs; i < xs + nx; i++) {
418           for (l = 0; l < nc; l++) {
419             if (dd->ofillcols[l] && (i % col)) {
420               PetscCall(ISColoringValueCast(nc - 1 + tc * ((i % col) - 1) + dd->ofillcols[l], colors + i1++));
421             } else {
422               PetscCall(ISColoringValueCast(l, colors + i1++));
423             }
424           }
425         }
426         ncolors = nc + 2 * s * tc;
427       } else {
428         i1 = 0;
429         for (i = xs; i < xs + nx; i++) {
430           for (l = 0; l < nc; l++) PetscCall(ISColoringValueCast(l + nc * (i % col), colors + i1++));
431         }
432         ncolors = nc + nc * (col - 1);
433       }
434       PetscCall(ISColoringCreate(comm, ncolors, nc * nx, colors, PETSC_OWN_POINTER, &dd->localcoloring));
435     }
436     *coloring = dd->localcoloring;
437   } else if (ctype == IS_COLORING_LOCAL) {
438     if (!dd->ghostedcoloring) {
439       PetscCall(PetscMalloc1(nc * gnx, &colors));
440       i1 = 0;
441       for (i = gxs; i < gxs + gnx; i++) {
442         for (l = 0; l < nc; l++) {
443           /* the complicated stuff is to handle periodic boundaries */
444           PetscCall(ISColoringValueCast(l + nc * (SetInRange(i, m) % col), colors + i1++));
445         }
446       }
447       ncolors = nc + nc * (col - 1);
448       PetscCall(ISColoringCreate(comm, ncolors, nc * gnx, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
449       PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
450     }
451     *coloring = dd->ghostedcoloring;
452   } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
453   PetscCall(ISColoringReference(*coloring));
454   PetscFunctionReturn(PETSC_SUCCESS);
455 }
456 
457 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
458 {
459   PetscInt         xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, dim, s, k, nc;
460   PetscInt         ncolors;
461   MPI_Comm         comm;
462   DMBoundaryType   bx, by;
463   ISColoringValue *colors;
464   DM_DA           *dd = (DM_DA *)da->data;
465 
466   PetscFunctionBegin;
467   /*
468          nc - number of components per grid point
469          col - number of colors needed in one direction for single component problem
470   */
471   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, NULL));
472   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
473   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
474   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
475   /* create the coloring */
476   if (ctype == IS_COLORING_GLOBAL) {
477     if (!dd->localcoloring) {
478       PetscCall(PetscMalloc1(nc * nx * ny, &colors));
479       ii = 0;
480       for (j = ys; j < ys + ny; j++) {
481         for (i = xs; i < xs + nx; i++) {
482           for (k = 0; k < nc; k++) PetscCall(ISColoringValueCast(k + nc * ((3 * j + i) % 5), colors + ii++));
483         }
484       }
485       ncolors = 5 * nc;
486       PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring));
487     }
488     *coloring = dd->localcoloring;
489   } else if (ctype == IS_COLORING_LOCAL) {
490     if (!dd->ghostedcoloring) {
491       PetscCall(PetscMalloc1(nc * gnx * gny, &colors));
492       ii = 0;
493       for (j = gys; j < gys + gny; j++) {
494         for (i = gxs; i < gxs + gnx; i++) {
495           for (k = 0; k < nc; k++) PetscCall(ISColoringValueCast(k + nc * ((3 * SetInRange(j, n) + SetInRange(i, m)) % 5), colors + ii++));
496         }
497       }
498       ncolors = 5 * nc;
499       PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
500       PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
501     }
502     *coloring = dd->ghostedcoloring;
503   } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
504   PetscFunctionReturn(PETSC_SUCCESS);
505 }
506 
507 /* =========================================================================== */
508 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM, Mat);
509 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM, Mat);
510 extern PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM, Mat);
511 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM, Mat);
512 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM, Mat);
513 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM, Mat);
514 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM, Mat);
515 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM, Mat);
516 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM, Mat);
517 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM, Mat);
518 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM, Mat);
519 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM, Mat);
520 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM, Mat);
521 extern PetscErrorCode DMCreateMatrix_DA_IS(DM, Mat);
522 
523 static PetscErrorCode MatView_MPI_DA(Mat A, PetscViewer viewer)
524 {
525   DM                da;
526   const char       *prefix;
527   Mat               AA, Anatural;
528   AO                ao;
529   PetscInt          rstart, rend, *petsc, i;
530   IS                is;
531   MPI_Comm          comm;
532   PetscViewerFormat format;
533   PetscBool         flag;
534 
535   PetscFunctionBegin;
536   /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */
537   PetscCall(PetscViewerGetFormat(viewer, &format));
538   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(PETSC_SUCCESS);
539 
540   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
541   PetscCall(MatGetDM(A, &da));
542   PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA");
543 
544   PetscCall(DMDAGetAO(da, &ao));
545   PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
546   PetscCall(PetscMalloc1(rend - rstart, &petsc));
547   for (i = rstart; i < rend; i++) petsc[i - rstart] = i;
548   PetscCall(AOApplicationToPetsc(ao, rend - rstart, petsc));
549   PetscCall(ISCreateGeneral(comm, rend - rstart, petsc, PETSC_OWN_POINTER, &is));
550 
551   /* call viewer on natural ordering */
552   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPISELL, &flag));
553   if (flag) {
554     PetscCall(MatConvert(A, MATAIJ, MAT_INITIAL_MATRIX, &AA));
555     A = AA;
556   }
557   PetscCall(MatCreateSubMatrix(A, is, is, MAT_INITIAL_MATRIX, &Anatural));
558   PetscCall(ISDestroy(&is));
559   PetscCall(PetscObjectGetOptionsPrefix((PetscObject)A, &prefix));
560   PetscCall(PetscObjectSetOptionsPrefix((PetscObject)Anatural, prefix));
561   PetscCall(PetscObjectSetName((PetscObject)Anatural, ((PetscObject)A)->name));
562   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE;
563   PetscCall(MatView(Anatural, viewer));
564   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE;
565   PetscCall(MatDestroy(&Anatural));
566   if (flag) PetscCall(MatDestroy(&AA));
567   PetscFunctionReturn(PETSC_SUCCESS);
568 }
569 
570 static PetscErrorCode MatLoad_MPI_DA(Mat A, PetscViewer viewer)
571 {
572   DM       da;
573   Mat      Anatural, Aapp;
574   AO       ao;
575   PetscInt rstart, rend, *app, i, m, n, M, N;
576   IS       is;
577   MPI_Comm comm;
578 
579   PetscFunctionBegin;
580   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
581   PetscCall(MatGetDM(A, &da));
582   PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA");
583 
584   /* Load the matrix in natural ordering */
585   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Anatural));
586   PetscCall(MatSetType(Anatural, ((PetscObject)A)->type_name));
587   PetscCall(MatGetSize(A, &M, &N));
588   PetscCall(MatGetLocalSize(A, &m, &n));
589   PetscCall(MatSetSizes(Anatural, m, n, M, N));
590   PetscCall(MatLoad(Anatural, viewer));
591 
592   /* Map natural ordering to application ordering and create IS */
593   PetscCall(DMDAGetAO(da, &ao));
594   PetscCall(MatGetOwnershipRange(Anatural, &rstart, &rend));
595   PetscCall(PetscMalloc1(rend - rstart, &app));
596   for (i = rstart; i < rend; i++) app[i - rstart] = i;
597   PetscCall(AOPetscToApplication(ao, rend - rstart, app));
598   PetscCall(ISCreateGeneral(comm, rend - rstart, app, PETSC_OWN_POINTER, &is));
599 
600   /* Do permutation and replace header */
601   PetscCall(MatCreateSubMatrix(Anatural, is, is, MAT_INITIAL_MATRIX, &Aapp));
602   PetscCall(MatHeaderReplace(A, &Aapp));
603   PetscCall(ISDestroy(&is));
604   PetscCall(MatDestroy(&Anatural));
605   PetscFunctionReturn(PETSC_SUCCESS);
606 }
607 
608 PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J)
609 {
610   PetscInt    dim, dof, nx, ny, nz, dims[3], starts[3], M, N, P;
611   Mat         A;
612   MPI_Comm    comm;
613   MatType     Atype;
614   MatType     mtype;
615   PetscMPIInt size;
616   DM_DA      *dd  = (DM_DA *)da->data;
617   PetscBool   aij = PETSC_FALSE, baij = PETSC_FALSE, sbaij = PETSC_FALSE, sell = PETSC_FALSE, is = PETSC_FALSE;
618 
619   PetscFunctionBegin;
620   PetscCall(MatInitializePackage());
621   mtype = da->mattype;
622 
623   /*
624                                   m
625           ------------------------------------------------------
626          |                                                     |
627          |                                                     |
628          |               ----------------------                |
629          |               |                    |                |
630       n  |           ny  |                    |                |
631          |               |                    |                |
632          |               .---------------------                |
633          |             (xs,ys)     nx                          |
634          |            .                                        |
635          |         (gxs,gys)                                   |
636          |                                                     |
637           -----------------------------------------------------
638   */
639 
640   /*
641          nc - number of components per grid point
642          col - number of colors needed in one direction for single component problem
643   */
644   M   = dd->M;
645   N   = dd->N;
646   P   = dd->P;
647   dim = da->dim;
648   dof = dd->w;
649   /* PetscCall(DMDAGetInfo(da,&dim,&M,&N,&P,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL)); */
650   PetscCall(DMDAGetCorners(da, NULL, NULL, NULL, &nx, &ny, &nz));
651   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
652   PetscCall(MatCreate(comm, &A));
653   PetscCall(MatSetSizes(A, dof * nx * ny * nz, dof * nx * ny * nz, dof * M * N * P, dof * M * N * P));
654   PetscCall(MatSetType(A, mtype));
655   PetscCall(MatSetFromOptions(A));
656   if (dof * nx * ny * nz < da->bind_below) {
657     PetscCall(MatSetBindingPropagates(A, PETSC_TRUE));
658     PetscCall(MatBindToCPU(A, PETSC_TRUE));
659   }
660   PetscCall(MatSetDM(A, da));
661   if (da->structure_only) PetscCall(MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE));
662   PetscCall(MatGetType(A, &Atype));
663   /*
664      We do not provide a getmatrix function in the DMDA operations because
665    the basic DMDA does not know about matrices. We think of DMDA as being more
666    more low-level than matrices. This is kind of cheating but, cause sometimes
667    we think of DMDA has higher level than matrices.
668 
669      We could switch based on Atype (or mtype), but we do not since the
670    specialized setting routines depend only on the particular preallocation
671    details of the matrix, not the type itself.
672   */
673   if (!da->prealloc_skip) { // Flag is likely set when user intends to use MatSetPreallocationCOO()
674     PetscCall(PetscObjectHasFunction((PetscObject)A, "MatMPIAIJSetPreallocation_C", &aij));
675     if (!aij) PetscCall(PetscObjectHasFunction((PetscObject)A, "MatSeqAIJSetPreallocation_C", &aij));
676     if (!aij) {
677       PetscCall(PetscObjectHasFunction((PetscObject)A, "MatMPIBAIJSetPreallocation_C", &baij));
678       if (!baij) PetscCall(PetscObjectHasFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", &baij));
679       if (!baij) {
680         PetscCall(PetscObjectHasFunction((PetscObject)A, "MatMPISBAIJSetPreallocation_C", &sbaij));
681         if (!sbaij) PetscCall(PetscObjectHasFunction((PetscObject)A, "MatSeqSBAIJSetPreallocation_C", &sbaij));
682         if (!sbaij) {
683           PetscCall(PetscObjectHasFunction((PetscObject)A, "MatMPISELLSetPreallocation_C", &sell));
684           if (!sell) PetscCall(PetscObjectHasFunction((PetscObject)A, "MatSeqSELLSetPreallocation_C", &sell));
685         }
686         if (!sell) PetscCall(PetscObjectHasFunction((PetscObject)A, "MatISSetPreallocation_C", &is));
687       }
688     }
689   }
690   if (aij) {
691     if (dim == 1) {
692       if (dd->ofill) {
693         PetscCall(DMCreateMatrix_DA_1d_MPIAIJ_Fill(da, A));
694       } else {
695         DMBoundaryType bx;
696         PetscMPIInt    size;
697         PetscCall(DMDAGetInfo(da, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &bx, NULL, NULL, NULL));
698         PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size));
699         if (size == 1 && bx == DM_BOUNDARY_NONE) {
700           PetscCall(DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(da, A));
701         } else {
702           PetscCall(DMCreateMatrix_DA_1d_MPIAIJ(da, A));
703         }
704       }
705     } else if (dim == 2) {
706       if (dd->ofill) {
707         PetscCall(DMCreateMatrix_DA_2d_MPIAIJ_Fill(da, A));
708       } else {
709         PetscCall(DMCreateMatrix_DA_2d_MPIAIJ(da, A));
710       }
711     } else if (dim == 3) {
712       if (dd->ofill) {
713         PetscCall(DMCreateMatrix_DA_3d_MPIAIJ_Fill(da, A));
714       } else {
715         PetscCall(DMCreateMatrix_DA_3d_MPIAIJ(da, A));
716       }
717     }
718   } else if (baij) {
719     if (dim == 2) {
720       PetscCall(DMCreateMatrix_DA_2d_MPIBAIJ(da, A));
721     } else if (dim == 3) {
722       PetscCall(DMCreateMatrix_DA_3d_MPIBAIJ(da, A));
723     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
724   } else if (sbaij) {
725     if (dim == 2) {
726       PetscCall(DMCreateMatrix_DA_2d_MPISBAIJ(da, A));
727     } else if (dim == 3) {
728       PetscCall(DMCreateMatrix_DA_3d_MPISBAIJ(da, A));
729     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
730   } else if (sell) {
731     if (dim == 2) {
732       PetscCall(DMCreateMatrix_DA_2d_MPISELL(da, A));
733     } else if (dim == 3) {
734       PetscCall(DMCreateMatrix_DA_3d_MPISELL(da, A));
735     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
736   } else if (is) {
737     PetscCall(DMCreateMatrix_DA_IS(da, A));
738   } else { // unknown type or da->prealloc_skip so structural information may be needed, but no prealloc
739     ISLocalToGlobalMapping ltog;
740 
741     PetscCall(MatSetBlockSize(A, dof));
742     PetscCall(MatSetUp(A));
743     PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
744     PetscCall(MatSetLocalToGlobalMapping(A, ltog, ltog));
745   }
746   PetscCall(DMDAGetGhostCorners(da, &starts[0], &starts[1], &starts[2], &dims[0], &dims[1], &dims[2]));
747   PetscCall(MatSetStencil(A, dim, dims, starts, dof));
748   PetscCall(MatSetDM(A, da));
749   PetscCallMPI(MPI_Comm_size(comm, &size));
750   if (size > 1) {
751     /* change viewer to display matrix in natural ordering */
752     PetscCall(MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA));
753     PetscCall(MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA));
754   }
755   *J = A;
756   PetscFunctionReturn(PETSC_SUCCESS);
757 }
758 
759 PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]);
760 
761 PetscErrorCode DMCreateMatrix_DA_IS(DM dm, Mat J)
762 {
763   DM_DA                 *da = (DM_DA *)dm->data;
764   Mat                    lJ, P;
765   ISLocalToGlobalMapping ltog;
766   IS                     is;
767   PetscBT                bt;
768   const PetscInt        *e_loc, *idx;
769   PetscInt               i, nel, nen, nv, dof, *gidx, n, N;
770 
771   /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted)
772      We need to filter out the local indices that are not represented through the DMDAGetElements decomposition */
773   PetscFunctionBegin;
774   dof = da->w;
775   PetscCall(MatSetBlockSize(J, dof));
776   PetscCall(DMGetLocalToGlobalMapping(dm, &ltog));
777 
778   /* flag local elements indices in local DMDA numbering */
779   PetscCall(ISLocalToGlobalMappingGetSize(ltog, &nv));
780   PetscCall(PetscBTCreate(nv / dof, &bt));
781   PetscCall(DMDAGetElements(dm, &nel, &nen, &e_loc)); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */
782   for (i = 0; i < nel * nen; i++) PetscCall(PetscBTSet(bt, e_loc[i]));
783 
784   /* filter out (set to -1) the global indices not used by the local elements */
785   PetscCall(PetscMalloc1(nv / dof, &gidx));
786   PetscCall(ISLocalToGlobalMappingGetBlockIndices(ltog, &idx));
787   PetscCall(PetscArraycpy(gidx, idx, nv / dof));
788   PetscCall(ISLocalToGlobalMappingRestoreBlockIndices(ltog, &idx));
789   for (i = 0; i < nv / dof; i++)
790     if (!PetscBTLookup(bt, i)) gidx[i] = -1;
791   PetscCall(PetscBTDestroy(&bt));
792   PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)dm), dof, nv / dof, gidx, PETSC_OWN_POINTER, &is));
793   PetscCall(ISLocalToGlobalMappingCreateIS(is, &ltog));
794   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
795   PetscCall(ISLocalToGlobalMappingDestroy(&ltog));
796   PetscCall(ISDestroy(&is));
797 
798   /* Preallocation */
799   if (dm->prealloc_skip) {
800     PetscCall(MatSetUp(J));
801   } else {
802     PetscCall(MatISGetLocalMat(J, &lJ));
803     PetscCall(MatGetLocalToGlobalMapping(lJ, &ltog, NULL));
804     PetscCall(MatCreate(PetscObjectComm((PetscObject)lJ), &P));
805     PetscCall(MatSetType(P, MATPREALLOCATOR));
806     PetscCall(MatSetLocalToGlobalMapping(P, ltog, ltog));
807     PetscCall(MatGetSize(lJ, &N, NULL));
808     PetscCall(MatGetLocalSize(lJ, &n, NULL));
809     PetscCall(MatSetSizes(P, n, n, N, N));
810     PetscCall(MatSetBlockSize(P, dof));
811     PetscCall(MatSetUp(P));
812     for (i = 0; i < nel; i++) PetscCall(MatSetValuesBlockedLocal(P, nen, e_loc + i * nen, nen, e_loc + i * nen, NULL, INSERT_VALUES));
813     PetscCall(MatPreallocatorPreallocate(P, (PetscBool)!da->prealloc_only, lJ));
814     PetscCall(MatISRestoreLocalMat(J, &lJ));
815     PetscCall(DMDARestoreElements(dm, &nel, &nen, &e_loc));
816     PetscCall(MatDestroy(&P));
817 
818     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
819     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
820   }
821   PetscFunctionReturn(PETSC_SUCCESS);
822 }
823 
824 PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da, Mat J)
825 {
826   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p;
827   PetscInt               lstart, lend, pstart, pend, *dnz, *onz;
828   MPI_Comm               comm;
829   PetscScalar           *values;
830   DMBoundaryType         bx, by;
831   ISLocalToGlobalMapping ltog;
832   DMDAStencilType        st;
833 
834   PetscFunctionBegin;
835   /*
836          nc - number of components per grid point
837          col - number of colors needed in one direction for single component problem
838   */
839   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st));
840   col = 2 * s + 1;
841   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
842   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
843   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
844 
845   PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols));
846   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
847 
848   PetscCall(MatSetBlockSize(J, nc));
849   /* determine the matrix preallocation information */
850   MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
851   for (i = xs; i < xs + nx; i++) {
852     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
853     pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
854 
855     for (j = ys; j < ys + ny; j++) {
856       slot = i - gxs + gnx * (j - gys);
857 
858       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
859       lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
860 
861       cnt = 0;
862       for (k = 0; k < nc; k++) {
863         for (l = lstart; l < lend + 1; l++) {
864           for (p = pstart; p < pend + 1; p++) {
865             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
866               cols[cnt++] = k + nc * (slot + gnx * l + p);
867             }
868           }
869         }
870         rows[k] = k + nc * (slot);
871       }
872       PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
873     }
874   }
875   PetscCall(MatSetBlockSize(J, nc));
876   PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz));
877   PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz));
878   MatPreallocateEnd(dnz, onz);
879 
880   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
881 
882   /*
883     For each node in the grid: we get the neighbors in the local (on processor ordering
884     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
885     PETSc ordering.
886   */
887   if (!da->prealloc_only) {
888     PetscCall(PetscCalloc1(col * col * nc * nc, &values));
889     for (i = xs; i < xs + nx; i++) {
890       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
891       pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
892 
893       for (j = ys; j < ys + ny; j++) {
894         slot = i - gxs + gnx * (j - gys);
895 
896         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
897         lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
898 
899         cnt = 0;
900         for (k = 0; k < nc; k++) {
901           for (l = lstart; l < lend + 1; l++) {
902             for (p = pstart; p < pend + 1; p++) {
903               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
904                 cols[cnt++] = k + nc * (slot + gnx * l + p);
905               }
906             }
907           }
908           rows[k] = k + nc * (slot);
909         }
910         PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES));
911       }
912     }
913     PetscCall(PetscFree(values));
914     /* do not copy values to GPU since they are all zero and not yet needed there */
915     PetscCall(MatBindToCPU(J, PETSC_TRUE));
916     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
917     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
918     PetscCall(MatBindToCPU(J, PETSC_FALSE));
919     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
920   }
921   PetscCall(PetscFree2(rows, cols));
922   PetscFunctionReturn(PETSC_SUCCESS);
923 }
924 
925 PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da, Mat J)
926 {
927   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
928   PetscInt               m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL;
929   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
930   MPI_Comm               comm;
931   PetscScalar           *values;
932   DMBoundaryType         bx, by, bz;
933   ISLocalToGlobalMapping ltog;
934   DMDAStencilType        st;
935 
936   PetscFunctionBegin;
937   /*
938          nc - number of components per grid point
939          col - number of colors needed in one direction for single component problem
940   */
941   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
942   col = 2 * s + 1;
943   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
944   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
945   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
946 
947   PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols));
948   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
949 
950   PetscCall(MatSetBlockSize(J, nc));
951   /* determine the matrix preallocation information */
952   MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
953   for (i = xs; i < xs + nx; i++) {
954     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
955     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
956     for (j = ys; j < ys + ny; j++) {
957       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
958       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
959       for (k = zs; k < zs + nz; k++) {
960         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
961         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
962 
963         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
964 
965         cnt = 0;
966         for (l = 0; l < nc; l++) {
967           for (ii = istart; ii < iend + 1; ii++) {
968             for (jj = jstart; jj < jend + 1; jj++) {
969               for (kk = kstart; kk < kend + 1; kk++) {
970                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
971                   cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
972                 }
973               }
974             }
975           }
976           rows[l] = l + nc * (slot);
977         }
978         PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
979       }
980     }
981   }
982   PetscCall(MatSetBlockSize(J, nc));
983   PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz));
984   PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz));
985   MatPreallocateEnd(dnz, onz);
986   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
987 
988   /*
989     For each node in the grid: we get the neighbors in the local (on processor ordering
990     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
991     PETSc ordering.
992   */
993   if (!da->prealloc_only) {
994     PetscCall(PetscCalloc1(col * col * col * nc * nc * nc, &values));
995     for (i = xs; i < xs + nx; i++) {
996       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
997       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
998       for (j = ys; j < ys + ny; j++) {
999         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1000         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1001         for (k = zs; k < zs + nz; k++) {
1002           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1003           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1004 
1005           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1006 
1007           cnt = 0;
1008           for (l = 0; l < nc; l++) {
1009             for (ii = istart; ii < iend + 1; ii++) {
1010               for (jj = jstart; jj < jend + 1; jj++) {
1011                 for (kk = kstart; kk < kend + 1; kk++) {
1012                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1013                     cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
1014                   }
1015                 }
1016               }
1017             }
1018             rows[l] = l + nc * (slot);
1019           }
1020           PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES));
1021         }
1022       }
1023     }
1024     PetscCall(PetscFree(values));
1025     /* do not copy values to GPU since they are all zero and not yet needed there */
1026     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1027     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1028     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1029     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1030     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1031   }
1032   PetscCall(PetscFree2(rows, cols));
1033   PetscFunctionReturn(PETSC_SUCCESS);
1034 }
1035 
1036 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da, Mat J)
1037 {
1038   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, M, N;
1039   PetscInt               lstart, lend, pstart, pend, *dnz, *onz;
1040   MPI_Comm               comm;
1041   DMBoundaryType         bx, by;
1042   ISLocalToGlobalMapping ltog, mltog;
1043   DMDAStencilType        st;
1044   PetscBool              removedups = PETSC_FALSE, alreadyboundtocpu = PETSC_TRUE;
1045 
1046   PetscFunctionBegin;
1047   /*
1048          nc - number of components per grid point
1049          col - number of colors needed in one direction for single component problem
1050   */
1051   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st));
1052   if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE));
1053   col = 2 * s + 1;
1054   /*
1055        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1056        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1057   */
1058   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1059   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1060   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1061   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1062   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1063 
1064   PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols));
1065   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1066 
1067   PetscCall(MatSetBlockSize(J, nc));
1068   /* determine the matrix preallocation information */
1069   MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
1070   for (i = xs; i < xs + nx; i++) {
1071     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1072     pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1073 
1074     for (j = ys; j < ys + ny; j++) {
1075       slot = i - gxs + gnx * (j - gys);
1076 
1077       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1078       lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1079 
1080       cnt = 0;
1081       for (k = 0; k < nc; k++) {
1082         for (l = lstart; l < lend + 1; l++) {
1083           for (p = pstart; p < pend + 1; p++) {
1084             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
1085               cols[cnt++] = k + nc * (slot + gnx * l + p);
1086             }
1087           }
1088         }
1089         rows[k] = k + nc * (slot);
1090       }
1091       if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1092       else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1093     }
1094   }
1095   PetscCall(MatSetBlockSize(J, nc));
1096   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
1097   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
1098   MatPreallocateEnd(dnz, onz);
1099   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1100   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1101 
1102   /*
1103     For each node in the grid: we get the neighbors in the local (on processor ordering
1104     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1105     PETSc ordering.
1106   */
1107   if (!da->prealloc_only) {
1108     for (i = xs; i < xs + nx; i++) {
1109       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1110       pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1111 
1112       for (j = ys; j < ys + ny; j++) {
1113         slot = i - gxs + gnx * (j - gys);
1114 
1115         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1116         lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1117 
1118         cnt = 0;
1119         for (l = lstart; l < lend + 1; l++) {
1120           for (p = pstart; p < pend + 1; p++) {
1121             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
1122               cols[cnt++] = nc * (slot + gnx * l + p);
1123               for (k = 1; k < nc; k++) {
1124                 cols[cnt] = 1 + cols[cnt - 1];
1125                 cnt++;
1126               }
1127             }
1128           }
1129         }
1130         for (k = 0; k < nc; k++) rows[k] = k + nc * (slot);
1131         PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1132       }
1133     }
1134     /* do not copy values to GPU since they are all zero and not yet needed there */
1135     PetscCall(MatBoundToCPU(J, &alreadyboundtocpu));
1136     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1137     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1138     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1139     if (!alreadyboundtocpu) PetscCall(MatBindToCPU(J, PETSC_FALSE));
1140     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1141     if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1142   }
1143   PetscCall(PetscFree2(rows, cols));
1144   PetscFunctionReturn(PETSC_SUCCESS);
1145 }
1146 
1147 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da, Mat J)
1148 {
1149   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1150   PetscInt               m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, M, N;
1151   PetscInt               lstart, lend, pstart, pend, *dnz, *onz;
1152   DM_DA                 *dd = (DM_DA *)da->data;
1153   PetscInt               ifill_col, *ofill = dd->ofill, *dfill = dd->dfill;
1154   MPI_Comm               comm;
1155   DMBoundaryType         bx, by;
1156   ISLocalToGlobalMapping ltog;
1157   DMDAStencilType        st;
1158   PetscBool              removedups = PETSC_FALSE;
1159 
1160   PetscFunctionBegin;
1161   /*
1162          nc - number of components per grid point
1163          col - number of colors needed in one direction for single component problem
1164   */
1165   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st));
1166   col = 2 * s + 1;
1167   /*
1168        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1169        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1170   */
1171   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1172   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1173   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1174   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1175   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1176 
1177   PetscCall(PetscMalloc1(col * col * nc, &cols));
1178   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1179 
1180   PetscCall(MatSetBlockSize(J, nc));
1181   /* determine the matrix preallocation information */
1182   MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
1183   for (i = xs; i < xs + nx; i++) {
1184     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1185     pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1186 
1187     for (j = ys; j < ys + ny; j++) {
1188       slot = i - gxs + gnx * (j - gys);
1189 
1190       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1191       lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1192 
1193       for (k = 0; k < nc; k++) {
1194         cnt = 0;
1195         for (l = lstart; l < lend + 1; l++) {
1196           for (p = pstart; p < pend + 1; p++) {
1197             if (l || p) {
1198               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */
1199                 for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p);
1200               }
1201             } else {
1202               if (dfill) {
1203                 for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p);
1204               } else {
1205                 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p);
1206               }
1207             }
1208           }
1209         }
1210         row    = k + nc * (slot);
1211         maxcnt = PetscMax(maxcnt, cnt);
1212         if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
1213         else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
1214       }
1215     }
1216   }
1217   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
1218   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
1219   MatPreallocateEnd(dnz, onz);
1220   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1221 
1222   /*
1223     For each node in the grid: we get the neighbors in the local (on processor ordering
1224     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1225     PETSc ordering.
1226   */
1227   if (!da->prealloc_only) {
1228     for (i = xs; i < xs + nx; i++) {
1229       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1230       pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1231 
1232       for (j = ys; j < ys + ny; j++) {
1233         slot = i - gxs + gnx * (j - gys);
1234 
1235         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1236         lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1237 
1238         for (k = 0; k < nc; k++) {
1239           cnt = 0;
1240           for (l = lstart; l < lend + 1; l++) {
1241             for (p = pstart; p < pend + 1; p++) {
1242               if (l || p) {
1243                 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */
1244                   for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p);
1245                 }
1246               } else {
1247                 if (dfill) {
1248                   for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p);
1249                 } else {
1250                   for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p);
1251                 }
1252               }
1253             }
1254           }
1255           row = k + nc * (slot);
1256           PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1257         }
1258       }
1259     }
1260     /* do not copy values to GPU since they are all zero and not yet needed there */
1261     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1262     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1263     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1264     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1265     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1266   }
1267   PetscCall(PetscFree(cols));
1268   PetscFunctionReturn(PETSC_SUCCESS);
1269 }
1270 
1271 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da, Mat J)
1272 {
1273   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1274   PetscInt               m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL;
1275   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
1276   MPI_Comm               comm;
1277   DMBoundaryType         bx, by, bz;
1278   ISLocalToGlobalMapping ltog, mltog;
1279   DMDAStencilType        st;
1280   PetscBool              removedups = PETSC_FALSE;
1281 
1282   PetscFunctionBegin;
1283   /*
1284          nc - number of components per grid point
1285          col - number of colors needed in one direction for single component problem
1286   */
1287   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
1288   if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE));
1289   col = 2 * s + 1;
1290 
1291   /*
1292        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1293        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1294   */
1295   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1296   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1297   if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE;
1298 
1299   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
1300   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
1301   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1302 
1303   PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols));
1304   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1305 
1306   PetscCall(MatSetBlockSize(J, nc));
1307   /* determine the matrix preallocation information */
1308   MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
1309   for (i = xs; i < xs + nx; i++) {
1310     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1311     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1312     for (j = ys; j < ys + ny; j++) {
1313       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1314       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1315       for (k = zs; k < zs + nz; k++) {
1316         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1317         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1318 
1319         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1320 
1321         cnt = 0;
1322         for (l = 0; l < nc; l++) {
1323           for (ii = istart; ii < iend + 1; ii++) {
1324             for (jj = jstart; jj < jend + 1; jj++) {
1325               for (kk = kstart; kk < kend + 1; kk++) {
1326                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1327                   cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
1328                 }
1329               }
1330             }
1331           }
1332           rows[l] = l + nc * (slot);
1333         }
1334         if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1335         else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1336       }
1337     }
1338   }
1339   PetscCall(MatSetBlockSize(J, nc));
1340   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
1341   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
1342   MatPreallocateEnd(dnz, onz);
1343   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1344   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1345 
1346   /*
1347     For each node in the grid: we get the neighbors in the local (on processor ordering
1348     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1349     PETSc ordering.
1350   */
1351   if (!da->prealloc_only) {
1352     for (i = xs; i < xs + nx; i++) {
1353       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1354       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1355       for (j = ys; j < ys + ny; j++) {
1356         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1357         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1358         for (k = zs; k < zs + nz; k++) {
1359           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1360           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1361 
1362           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1363 
1364           cnt = 0;
1365           for (kk = kstart; kk < kend + 1; kk++) {
1366             for (jj = jstart; jj < jend + 1; jj++) {
1367               for (ii = istart; ii < iend + 1; ii++) {
1368                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1369                   cols[cnt++] = nc * (slot + ii + gnx * jj + gnx * gny * kk);
1370                   for (l = 1; l < nc; l++) {
1371                     cols[cnt] = 1 + cols[cnt - 1];
1372                     cnt++;
1373                   }
1374                 }
1375               }
1376             }
1377           }
1378           rows[0] = nc * (slot);
1379           for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1380           PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1381         }
1382       }
1383     }
1384     /* do not copy values to GPU since they are all zero and not yet needed there */
1385     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1386     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1387     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1388     if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1389     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1390     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1391   }
1392   PetscCall(PetscFree2(rows, cols));
1393   PetscFunctionReturn(PETSC_SUCCESS);
1394 }
1395 
1396 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da, Mat J)
1397 {
1398   DM_DA                 *dd = (DM_DA *)da->data;
1399   PetscInt               xs, nx, i, j, gxs, gnx, row, k, l;
1400   PetscInt               m, dim, s, *cols = NULL, nc, cnt, maxcnt = 0, *ocols;
1401   PetscInt              *ofill = dd->ofill, *dfill = dd->dfill;
1402   DMBoundaryType         bx;
1403   ISLocalToGlobalMapping ltog;
1404   PetscMPIInt            rank, size;
1405 
1406   PetscFunctionBegin;
1407   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)da), &rank));
1408   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size));
1409 
1410   /*
1411          nc - number of components per grid point
1412   */
1413   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
1414   PetscCheck(s <= 1, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Matrix creation for 1d not implemented correctly for stencil width larger than 1");
1415   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
1416   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
1417 
1418   PetscCall(MatSetBlockSize(J, nc));
1419   PetscCall(PetscCalloc2(nx * nc, &cols, nx * nc, &ocols));
1420 
1421   /*
1422         note should be smaller for first and last process with no periodic
1423         does not handle dfill
1424   */
1425   cnt = 0;
1426   /* coupling with process to the left */
1427   for (i = 0; i < s; i++) {
1428     for (j = 0; j < nc; j++) {
1429       ocols[cnt] = ((rank == 0) ? 0 : (s - i) * (ofill[j + 1] - ofill[j]));
1430       cols[cnt]  = dfill[j + 1] - dfill[j] + (s + i) * (ofill[j + 1] - ofill[j]);
1431       if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1432         if (size > 1) ocols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]);
1433         else cols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]);
1434       }
1435       maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1436       cnt++;
1437     }
1438   }
1439   for (i = s; i < nx - s; i++) {
1440     for (j = 0; j < nc; j++) {
1441       cols[cnt] = dfill[j + 1] - dfill[j] + 2 * s * (ofill[j + 1] - ofill[j]);
1442       maxcnt    = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1443       cnt++;
1444     }
1445   }
1446   /* coupling with process to the right */
1447   for (i = nx - s; i < nx; i++) {
1448     for (j = 0; j < nc; j++) {
1449       ocols[cnt] = ((rank == (size - 1)) ? 0 : (i - nx + s + 1) * (ofill[j + 1] - ofill[j]));
1450       cols[cnt]  = dfill[j + 1] - dfill[j] + (s + nx - i - 1) * (ofill[j + 1] - ofill[j]);
1451       if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1452         if (size > 1) ocols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]);
1453         else cols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]);
1454       }
1455       maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1456       cnt++;
1457     }
1458   }
1459 
1460   PetscCall(MatSeqAIJSetPreallocation(J, 0, cols));
1461   PetscCall(MatMPIAIJSetPreallocation(J, 0, cols, 0, ocols));
1462   PetscCall(PetscFree2(cols, ocols));
1463 
1464   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1465   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1466 
1467   /*
1468     For each node in the grid: we get the neighbors in the local (on processor ordering
1469     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1470     PETSc ordering.
1471   */
1472   if (!da->prealloc_only) {
1473     PetscCall(PetscMalloc1(maxcnt, &cols));
1474     row = xs * nc;
1475     /* coupling with process to the left */
1476     for (i = xs; i < xs + s; i++) {
1477       for (j = 0; j < nc; j++) {
1478         cnt = 0;
1479         if (rank) {
1480           for (l = 0; l < s; l++) {
1481             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1482           }
1483         }
1484         if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1485           for (l = 0; l < s; l++) {
1486             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (m + i - s - l) * nc + ofill[k];
1487           }
1488         }
1489         if (dfill) {
1490           for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1491         } else {
1492           for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1493         }
1494         for (l = 0; l < s; l++) {
1495           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1496         }
1497         PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1498         row++;
1499       }
1500     }
1501     for (i = xs + s; i < xs + nx - s; i++) {
1502       for (j = 0; j < nc; j++) {
1503         cnt = 0;
1504         for (l = 0; l < s; l++) {
1505           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1506         }
1507         if (dfill) {
1508           for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1509         } else {
1510           for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1511         }
1512         for (l = 0; l < s; l++) {
1513           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1514         }
1515         PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1516         row++;
1517       }
1518     }
1519     /* coupling with process to the right */
1520     for (i = xs + nx - s; i < xs + nx; i++) {
1521       for (j = 0; j < nc; j++) {
1522         cnt = 0;
1523         for (l = 0; l < s; l++) {
1524           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1525         }
1526         if (dfill) {
1527           for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1528         } else {
1529           for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1530         }
1531         if (rank < size - 1) {
1532           for (l = 0; l < s; l++) {
1533             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1534           }
1535         }
1536         if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1537           for (l = 0; l < s; l++) {
1538             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s - l - m + 2) * nc + ofill[k];
1539           }
1540         }
1541         PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1542         row++;
1543       }
1544     }
1545     PetscCall(PetscFree(cols));
1546     /* do not copy values to GPU since they are all zero and not yet needed there */
1547     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1548     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1549     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1550     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1551     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1552   }
1553   PetscFunctionReturn(PETSC_SUCCESS);
1554 }
1555 
1556 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da, Mat J)
1557 {
1558   PetscInt               xs, nx, i, i1, slot, gxs, gnx;
1559   PetscInt               m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l;
1560   PetscInt               istart, iend;
1561   DMBoundaryType         bx;
1562   ISLocalToGlobalMapping ltog, mltog;
1563 
1564   PetscFunctionBegin;
1565   /*
1566          nc - number of components per grid point
1567          col - number of colors needed in one direction for single component problem
1568   */
1569   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
1570   if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE));
1571   col = 2 * s + 1;
1572 
1573   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
1574   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
1575 
1576   PetscCall(MatSetBlockSize(J, nc));
1577   PetscCall(MatSeqAIJSetPreallocation(J, col * nc, NULL));
1578   PetscCall(MatMPIAIJSetPreallocation(J, col * nc, NULL, col * nc, NULL));
1579 
1580   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1581   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1582   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1583 
1584   /*
1585     For each node in the grid: we get the neighbors in the local (on processor ordering
1586     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1587     PETSc ordering.
1588   */
1589   if (!da->prealloc_only) {
1590     PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols));
1591     for (i = xs; i < xs + nx; i++) {
1592       istart = PetscMax(-s, gxs - i);
1593       iend   = PetscMin(s, gxs + gnx - i - 1);
1594       slot   = i - gxs;
1595 
1596       cnt = 0;
1597       for (i1 = istart; i1 < iend + 1; i1++) {
1598         cols[cnt++] = nc * (slot + i1);
1599         for (l = 1; l < nc; l++) {
1600           cols[cnt] = 1 + cols[cnt - 1];
1601           cnt++;
1602         }
1603       }
1604       rows[0] = nc * (slot);
1605       for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1606       PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1607     }
1608     /* do not copy values to GPU since they are all zero and not yet needed there */
1609     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1610     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1611     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1612     if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1613     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1614     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1615     PetscCall(PetscFree2(rows, cols));
1616   }
1617   PetscFunctionReturn(PETSC_SUCCESS);
1618 }
1619 
1620 PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM da, Mat J)
1621 {
1622   PetscInt               xs, nx, i, i1, slot, gxs, gnx;
1623   PetscInt               m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l;
1624   PetscInt               istart, iend;
1625   DMBoundaryType         bx;
1626   ISLocalToGlobalMapping ltog, mltog;
1627 
1628   PetscFunctionBegin;
1629   /*
1630          nc - number of components per grid point
1631          col - number of colors needed in one direction for single component problem
1632   */
1633   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
1634   col = 2 * s + 1;
1635 
1636   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
1637   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
1638 
1639   PetscCall(MatSetBlockSize(J, nc));
1640   PetscCall(MatSeqAIJSetTotalPreallocation(J, nx * nc * col * nc));
1641 
1642   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1643   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1644   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1645 
1646   /*
1647     For each node in the grid: we get the neighbors in the local (on processor ordering
1648     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1649     PETSc ordering.
1650   */
1651   if (!da->prealloc_only) {
1652     PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols));
1653     for (i = xs; i < xs + nx; i++) {
1654       istart = PetscMax(-s, gxs - i);
1655       iend   = PetscMin(s, gxs + gnx - i - 1);
1656       slot   = i - gxs;
1657 
1658       cnt = 0;
1659       for (i1 = istart; i1 < iend + 1; i1++) {
1660         cols[cnt++] = nc * (slot + i1);
1661         for (l = 1; l < nc; l++) {
1662           cols[cnt] = 1 + cols[cnt - 1];
1663           cnt++;
1664         }
1665       }
1666       rows[0] = nc * (slot);
1667       for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1668       PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1669     }
1670     /* do not copy values to GPU since they are all zero and not yet needed there */
1671     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1672     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1673     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1674     if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1675     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1676     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1677     PetscCall(PetscFree2(rows, cols));
1678   }
1679   PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1680   PetscFunctionReturn(PETSC_SUCCESS);
1681 }
1682 
1683 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da, Mat J)
1684 {
1685   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1686   PetscInt               m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz;
1687   PetscInt               istart, iend, jstart, jend, ii, jj;
1688   MPI_Comm               comm;
1689   PetscScalar           *values;
1690   DMBoundaryType         bx, by;
1691   DMDAStencilType        st;
1692   ISLocalToGlobalMapping ltog;
1693 
1694   PetscFunctionBegin;
1695   /*
1696      nc - number of components per grid point
1697      col - number of colors needed in one direction for single component problem
1698   */
1699   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st));
1700   col = 2 * s + 1;
1701 
1702   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1703   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1704   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1705 
1706   PetscCall(PetscMalloc1(col * col * nc * nc, &cols));
1707 
1708   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1709 
1710   /* determine the matrix preallocation information */
1711   MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz);
1712   for (i = xs; i < xs + nx; i++) {
1713     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1714     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1715     for (j = ys; j < ys + ny; j++) {
1716       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1717       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1718       slot   = i - gxs + gnx * (j - gys);
1719 
1720       /* Find block columns in block row */
1721       cnt = 0;
1722       for (ii = istart; ii < iend + 1; ii++) {
1723         for (jj = jstart; jj < jend + 1; jj++) {
1724           if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1725             cols[cnt++] = slot + ii + gnx * jj;
1726           }
1727         }
1728       }
1729       PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz));
1730     }
1731   }
1732   PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz));
1733   PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
1734   MatPreallocateEnd(dnz, onz);
1735 
1736   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1737 
1738   /*
1739     For each node in the grid: we get the neighbors in the local (on processor ordering
1740     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1741     PETSc ordering.
1742   */
1743   if (!da->prealloc_only) {
1744     PetscCall(PetscCalloc1(col * col * nc * nc, &values));
1745     for (i = xs; i < xs + nx; i++) {
1746       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1747       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1748       for (j = ys; j < ys + ny; j++) {
1749         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1750         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1751         slot   = i - gxs + gnx * (j - gys);
1752         cnt    = 0;
1753         for (ii = istart; ii < iend + 1; ii++) {
1754           for (jj = jstart; jj < jend + 1; jj++) {
1755             if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1756               cols[cnt++] = slot + ii + gnx * jj;
1757             }
1758           }
1759         }
1760         PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
1761       }
1762     }
1763     PetscCall(PetscFree(values));
1764     /* do not copy values to GPU since they are all zero and not yet needed there */
1765     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1766     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1767     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1768     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1769     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1770   }
1771   PetscCall(PetscFree(cols));
1772   PetscFunctionReturn(PETSC_SUCCESS);
1773 }
1774 
1775 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da, Mat J)
1776 {
1777   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1778   PetscInt               m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz;
1779   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk;
1780   MPI_Comm               comm;
1781   PetscScalar           *values;
1782   DMBoundaryType         bx, by, bz;
1783   DMDAStencilType        st;
1784   ISLocalToGlobalMapping ltog;
1785 
1786   PetscFunctionBegin;
1787   /*
1788          nc - number of components per grid point
1789          col - number of colors needed in one direction for single component problem
1790   */
1791   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st));
1792   col = 2 * s + 1;
1793 
1794   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
1795   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
1796   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1797 
1798   PetscCall(PetscMalloc1(col * col * col, &cols));
1799 
1800   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1801 
1802   /* determine the matrix preallocation information */
1803   MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz);
1804   for (i = xs; i < xs + nx; i++) {
1805     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1806     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1807     for (j = ys; j < ys + ny; j++) {
1808       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1809       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1810       for (k = zs; k < zs + nz; k++) {
1811         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1812         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1813 
1814         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1815 
1816         /* Find block columns in block row */
1817         cnt = 0;
1818         for (ii = istart; ii < iend + 1; ii++) {
1819           for (jj = jstart; jj < jend + 1; jj++) {
1820             for (kk = kstart; kk < kend + 1; kk++) {
1821               if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1822                 cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
1823               }
1824             }
1825           }
1826         }
1827         PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz));
1828       }
1829     }
1830   }
1831   PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz));
1832   PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
1833   MatPreallocateEnd(dnz, onz);
1834 
1835   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1836 
1837   /*
1838     For each node in the grid: we get the neighbors in the local (on processor ordering
1839     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1840     PETSc ordering.
1841   */
1842   if (!da->prealloc_only) {
1843     PetscCall(PetscCalloc1(col * col * col * nc * nc, &values));
1844     for (i = xs; i < xs + nx; i++) {
1845       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1846       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1847       for (j = ys; j < ys + ny; j++) {
1848         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1849         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1850         for (k = zs; k < zs + nz; k++) {
1851           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1852           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1853 
1854           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1855 
1856           cnt = 0;
1857           for (ii = istart; ii < iend + 1; ii++) {
1858             for (jj = jstart; jj < jend + 1; jj++) {
1859               for (kk = kstart; kk < kend + 1; kk++) {
1860                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1861                   cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
1862                 }
1863               }
1864             }
1865           }
1866           PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
1867         }
1868       }
1869     }
1870     PetscCall(PetscFree(values));
1871     /* do not copy values to GPU since they are all zero and not yet needed there */
1872     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1873     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1874     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1875     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1876     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1877   }
1878   PetscCall(PetscFree(cols));
1879   PetscFunctionReturn(PETSC_SUCCESS);
1880 }
1881 
1882 /*
1883   This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to
1884   identify in the local ordering with periodic domain.
1885 */
1886 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog, PetscInt *row, PetscInt *cnt, PetscInt col[])
1887 {
1888   PetscInt i, n;
1889 
1890   PetscFunctionBegin;
1891   PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, 1, row, row));
1892   PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, *cnt, col, col));
1893   for (i = 0, n = 0; i < *cnt; i++) {
1894     if (col[i] >= *row) col[n++] = col[i];
1895   }
1896   *cnt = n;
1897   PetscFunctionReturn(PETSC_SUCCESS);
1898 }
1899 
1900 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da, Mat J)
1901 {
1902   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1903   PetscInt               m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz;
1904   PetscInt               istart, iend, jstart, jend, ii, jj;
1905   MPI_Comm               comm;
1906   PetscScalar           *values;
1907   DMBoundaryType         bx, by;
1908   DMDAStencilType        st;
1909   ISLocalToGlobalMapping ltog;
1910 
1911   PetscFunctionBegin;
1912   /*
1913      nc - number of components per grid point
1914      col - number of colors needed in one direction for single component problem
1915   */
1916   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st));
1917   col = 2 * s + 1;
1918 
1919   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1920   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1921   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1922 
1923   PetscCall(PetscMalloc1(col * col * nc * nc, &cols));
1924 
1925   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1926 
1927   /* determine the matrix preallocation information */
1928   MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz);
1929   for (i = xs; i < xs + nx; i++) {
1930     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1931     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1932     for (j = ys; j < ys + ny; j++) {
1933       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1934       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1935       slot   = i - gxs + gnx * (j - gys);
1936 
1937       /* Find block columns in block row */
1938       cnt = 0;
1939       for (ii = istart; ii < iend + 1; ii++) {
1940         for (jj = jstart; jj < jend + 1; jj++) {
1941           if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj;
1942         }
1943       }
1944       PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
1945       PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz));
1946     }
1947   }
1948   PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz));
1949   PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
1950   MatPreallocateEnd(dnz, onz);
1951 
1952   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1953 
1954   /*
1955     For each node in the grid: we get the neighbors in the local (on processor ordering
1956     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1957     PETSc ordering.
1958   */
1959   if (!da->prealloc_only) {
1960     PetscCall(PetscCalloc1(col * col * nc * nc, &values));
1961     for (i = xs; i < xs + nx; i++) {
1962       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1963       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1964       for (j = ys; j < ys + ny; j++) {
1965         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1966         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1967         slot   = i - gxs + gnx * (j - gys);
1968 
1969         /* Find block columns in block row */
1970         cnt = 0;
1971         for (ii = istart; ii < iend + 1; ii++) {
1972           for (jj = jstart; jj < jend + 1; jj++) {
1973             if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj;
1974           }
1975         }
1976         PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
1977         PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
1978       }
1979     }
1980     PetscCall(PetscFree(values));
1981     /* do not copy values to GPU since they are all zero and not yet needed there */
1982     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1983     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1984     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1985     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1986     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1987   }
1988   PetscCall(PetscFree(cols));
1989   PetscFunctionReturn(PETSC_SUCCESS);
1990 }
1991 
1992 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da, Mat J)
1993 {
1994   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1995   PetscInt               m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz;
1996   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk;
1997   MPI_Comm               comm;
1998   PetscScalar           *values;
1999   DMBoundaryType         bx, by, bz;
2000   DMDAStencilType        st;
2001   ISLocalToGlobalMapping ltog;
2002 
2003   PetscFunctionBegin;
2004   /*
2005      nc - number of components per grid point
2006      col - number of colors needed in one direction for single component problem
2007   */
2008   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st));
2009   col = 2 * s + 1;
2010 
2011   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
2012   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
2013   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
2014 
2015   /* create the matrix */
2016   PetscCall(PetscMalloc1(col * col * col, &cols));
2017 
2018   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
2019 
2020   /* determine the matrix preallocation information */
2021   MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz);
2022   for (i = xs; i < xs + nx; i++) {
2023     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2024     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2025     for (j = ys; j < ys + ny; j++) {
2026       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2027       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2028       for (k = zs; k < zs + nz; k++) {
2029         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2030         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2031 
2032         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2033 
2034         /* Find block columns in block row */
2035         cnt = 0;
2036         for (ii = istart; ii < iend + 1; ii++) {
2037           for (jj = jstart; jj < jend + 1; jj++) {
2038             for (kk = kstart; kk < kend + 1; kk++) {
2039               if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
2040             }
2041           }
2042         }
2043         PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
2044         PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz));
2045       }
2046     }
2047   }
2048   PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz));
2049   PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
2050   MatPreallocateEnd(dnz, onz);
2051 
2052   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
2053 
2054   /*
2055     For each node in the grid: we get the neighbors in the local (on processor ordering
2056     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2057     PETSc ordering.
2058   */
2059   if (!da->prealloc_only) {
2060     PetscCall(PetscCalloc1(col * col * col * nc * nc, &values));
2061     for (i = xs; i < xs + nx; i++) {
2062       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2063       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2064       for (j = ys; j < ys + ny; j++) {
2065         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2066         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2067         for (k = zs; k < zs + nz; k++) {
2068           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2069           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2070 
2071           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2072 
2073           cnt = 0;
2074           for (ii = istart; ii < iend + 1; ii++) {
2075             for (jj = jstart; jj < jend + 1; jj++) {
2076               for (kk = kstart; kk < kend + 1; kk++) {
2077                 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
2078               }
2079             }
2080           }
2081           PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
2082           PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
2083         }
2084       }
2085     }
2086     PetscCall(PetscFree(values));
2087     /* do not copy values to GPU since they are all zero and not yet needed there */
2088     PetscCall(MatBindToCPU(J, PETSC_TRUE));
2089     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
2090     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
2091     PetscCall(MatBindToCPU(J, PETSC_FALSE));
2092     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2093   }
2094   PetscCall(PetscFree(cols));
2095   PetscFunctionReturn(PETSC_SUCCESS);
2096 }
2097 
2098 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da, Mat J)
2099 {
2100   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
2101   PetscInt               m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, *dnz, *onz;
2102   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
2103   DM_DA                 *dd = (DM_DA *)da->data;
2104   PetscInt               ifill_col, *dfill = dd->dfill, *ofill = dd->ofill;
2105   MPI_Comm               comm;
2106   PetscScalar           *values;
2107   DMBoundaryType         bx, by, bz;
2108   ISLocalToGlobalMapping ltog;
2109   DMDAStencilType        st;
2110   PetscBool              removedups = PETSC_FALSE;
2111 
2112   PetscFunctionBegin;
2113   /*
2114          nc - number of components per grid point
2115          col - number of colors needed in one direction for single component problem
2116   */
2117   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
2118   col = 2 * s + 1;
2119 
2120   /*
2121        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
2122        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
2123   */
2124   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
2125   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
2126   if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE;
2127 
2128   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
2129   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
2130   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
2131 
2132   PetscCall(PetscMalloc1(col * col * col * nc, &cols));
2133   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
2134 
2135   /* determine the matrix preallocation information */
2136   MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
2137 
2138   PetscCall(MatSetBlockSize(J, nc));
2139   for (i = xs; i < xs + nx; i++) {
2140     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2141     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2142     for (j = ys; j < ys + ny; j++) {
2143       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2144       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2145       for (k = zs; k < zs + nz; k++) {
2146         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2147         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2148 
2149         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2150 
2151         for (l = 0; l < nc; l++) {
2152           cnt = 0;
2153           for (ii = istart; ii < iend + 1; ii++) {
2154             for (jj = jstart; jj < jend + 1; jj++) {
2155               for (kk = kstart; kk < kend + 1; kk++) {
2156                 if (ii || jj || kk) {
2157                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
2158                     for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2159                   }
2160                 } else {
2161                   if (dfill) {
2162                     for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2163                   } else {
2164                     for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2165                   }
2166                 }
2167               }
2168             }
2169           }
2170           row    = l + nc * (slot);
2171           maxcnt = PetscMax(maxcnt, cnt);
2172           if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
2173           else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
2174         }
2175       }
2176     }
2177   }
2178   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
2179   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
2180   MatPreallocateEnd(dnz, onz);
2181   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
2182 
2183   /*
2184     For each node in the grid: we get the neighbors in the local (on processor ordering
2185     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2186     PETSc ordering.
2187   */
2188   if (!da->prealloc_only) {
2189     PetscCall(PetscCalloc1(maxcnt, &values));
2190     for (i = xs; i < xs + nx; i++) {
2191       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2192       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2193       for (j = ys; j < ys + ny; j++) {
2194         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2195         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2196         for (k = zs; k < zs + nz; k++) {
2197           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2198           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2199 
2200           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2201 
2202           for (l = 0; l < nc; l++) {
2203             cnt = 0;
2204             for (ii = istart; ii < iend + 1; ii++) {
2205               for (jj = jstart; jj < jend + 1; jj++) {
2206                 for (kk = kstart; kk < kend + 1; kk++) {
2207                   if (ii || jj || kk) {
2208                     if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
2209                       for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2210                     }
2211                   } else {
2212                     if (dfill) {
2213                       for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2214                     } else {
2215                       for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2216                     }
2217                   }
2218                 }
2219               }
2220             }
2221             row = l + nc * (slot);
2222             PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, values, INSERT_VALUES));
2223           }
2224         }
2225       }
2226     }
2227     PetscCall(PetscFree(values));
2228     /* do not copy values to GPU since they are all zero and not yet needed there */
2229     PetscCall(MatBindToCPU(J, PETSC_TRUE));
2230     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
2231     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
2232     PetscCall(MatBindToCPU(J, PETSC_FALSE));
2233     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2234   }
2235   PetscCall(PetscFree(cols));
2236   PetscFunctionReturn(PETSC_SUCCESS);
2237 }
2238