xref: /petsc/src/dm/impls/da/fdda.c (revision f97672e55eacc8688507b9471cd7ec2664d7f203)
1 
2 #include <petsc/private/dmdaimpl.h> /*I      "petscdmda.h"     I*/
3 #include <petscmat.h>
4 #include <petscbt.h>
5 
6 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM,ISColoringType,ISColoring*);
7 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM,ISColoringType,ISColoring*);
8 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM,ISColoringType,ISColoring*);
9 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM,ISColoringType,ISColoring*);
10 
11 /*
12    For ghost i that may be negative or greater than the upper bound this
13   maps it into the 0:m-1 range using periodicity
14 */
15 #define SetInRange(i,m) ((i < 0) ? m+i : ((i >= m) ? i-m : i))
16 
17 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill,PetscInt w,PetscInt **rfill)
18 {
19   PetscInt       i,j,nz,*fill;
20 
21   PetscFunctionBegin;
22   if (!dfill) PetscFunctionReturn(0);
23 
24   /* count number nonzeros */
25   nz = 0;
26   for (i=0; i<w; i++) {
27     for (j=0; j<w; j++) {
28       if (dfill[w*i+j]) nz++;
29     }
30   }
31   PetscCall(PetscMalloc1(nz + w + 1,&fill));
32   /* construct modified CSR storage of nonzero structure */
33   /*  fill[0 -- w] marks starts of each row of column indices (and end of last row)
34    so fill[1] - fill[0] gives number of nonzeros in first row etc */
35   nz = w + 1;
36   for (i=0; i<w; i++) {
37     fill[i] = nz;
38     for (j=0; j<w; j++) {
39       if (dfill[w*i+j]) {
40         fill[nz] = j;
41         nz++;
42       }
43     }
44   }
45   fill[w] = nz;
46 
47   *rfill = fill;
48   PetscFunctionReturn(0);
49 }
50 
51 static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse,PetscInt w,PetscInt **rfill)
52 {
53   PetscInt       nz;
54 
55   PetscFunctionBegin;
56   if (!dfillsparse) PetscFunctionReturn(0);
57 
58   /* Determine number of non-zeros */
59   nz = (dfillsparse[w] - w - 1);
60 
61   /* Allocate space for our copy of the given sparse matrix representation. */
62   PetscCall(PetscMalloc1(nz + w + 1,rfill));
63   PetscCall(PetscArraycpy(*rfill,dfillsparse,nz+w+1));
64   PetscFunctionReturn(0);
65 }
66 
67 static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd)
68 {
69   PetscInt       i,k,cnt = 1;
70 
71   PetscFunctionBegin;
72 
73   /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of
74    columns to the left with any nonzeros in them plus 1 */
75   PetscCall(PetscCalloc1(dd->w,&dd->ofillcols));
76   for (i=0; i<dd->w; i++) {
77     for (k=dd->ofill[i]; k<dd->ofill[i+1]; k++) dd->ofillcols[dd->ofill[k]] = 1;
78   }
79   for (i=0; i<dd->w; i++) {
80     if (dd->ofillcols[i]) {
81       dd->ofillcols[i] = cnt++;
82     }
83   }
84   PetscFunctionReturn(0);
85 }
86 
87 /*@
88     DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem
89     of the matrix returned by DMCreateMatrix().
90 
91     Logically Collective on da
92 
93     Input Parameters:
94 +   da - the distributed array
95 .   dfill - the fill pattern in the diagonal block (may be NULL, means use dense block)
96 -   ofill - the fill pattern in the off-diagonal blocks
97 
98     Level: developer
99 
100     Notes:
101     This only makes sense when you are doing multicomponent problems but using the
102        MPIAIJ matrix format
103 
104            The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries
105        representing coupling and 0 entries for missing coupling. For example
106 $             dfill[9] = {1, 0, 0,
107 $                         1, 1, 0,
108 $                         0, 1, 1}
109        means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with
110        itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the
111        diagonal block).
112 
113      DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
114      can be represented in the dfill, ofill format
115 
116    Contributed by Glenn Hammond
117 
118 .seealso `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()`
119 
120 @*/
121 PetscErrorCode  DMDASetBlockFills(DM da,const PetscInt *dfill,const PetscInt *ofill)
122 {
123   DM_DA          *dd = (DM_DA*)da->data;
124 
125   PetscFunctionBegin;
126   /* save the given dfill and ofill information */
127   PetscCall(DMDASetBlockFills_Private(dfill,dd->w,&dd->dfill));
128   PetscCall(DMDASetBlockFills_Private(ofill,dd->w,&dd->ofill));
129 
130   /* count nonzeros in ofill columns */
131   PetscCall(DMDASetBlockFills_Private2(dd));
132 
133   PetscFunctionReturn(0);
134 }
135 
136 /*@
137     DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem
138     of the matrix returned by DMCreateMatrix(), using sparse representations
139     of fill patterns.
140 
141     Logically Collective on da
142 
143     Input Parameters:
144 +   da - the distributed array
145 .   dfill - the sparse fill pattern in the diagonal block (may be NULL, means use dense block)
146 -   ofill - the sparse fill pattern in the off-diagonal blocks
147 
148     Level: developer
149 
150     Notes: This only makes sense when you are doing multicomponent problems but using the
151        MPIAIJ matrix format
152 
153            The format for dfill and ofill is a sparse representation of a
154            dof-by-dof matrix with 1 entries representing coupling and 0 entries
155            for missing coupling.  The sparse representation is a 1 dimensional
156            array of length nz + dof + 1, where nz is the number of non-zeros in
157            the matrix.  The first dof entries in the array give the
158            starting array indices of each row's items in the rest of the array,
159            the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array)
160            and the remaining nz items give the column indices of each of
161            the 1s within the logical 2D matrix.  Each row's items within
162            the array are the column indices of the 1s within that row
163            of the 2D matrix.  PETSc developers may recognize that this is the
164            same format as that computed by the DMDASetBlockFills_Private()
165            function from a dense 2D matrix representation.
166 
167      DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
168      can be represented in the dfill, ofill format
169 
170    Contributed by Philip C. Roth
171 
172 .seealso `DMDASetBlockFills()`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()`
173 
174 @*/
175 PetscErrorCode  DMDASetBlockFillsSparse(DM da,const PetscInt *dfillsparse,const PetscInt *ofillsparse)
176 {
177   DM_DA          *dd = (DM_DA*)da->data;
178 
179   PetscFunctionBegin;
180   /* save the given dfill and ofill information */
181   PetscCall(DMDASetBlockFillsSparse_Private(dfillsparse,dd->w,&dd->dfill));
182   PetscCall(DMDASetBlockFillsSparse_Private(ofillsparse,dd->w,&dd->ofill));
183 
184   /* count nonzeros in ofill columns */
185   PetscCall(DMDASetBlockFills_Private2(dd));
186 
187   PetscFunctionReturn(0);
188 }
189 
190 PetscErrorCode  DMCreateColoring_DA(DM da,ISColoringType ctype,ISColoring *coloring)
191 {
192   PetscInt         dim,m,n,p,nc;
193   DMBoundaryType   bx,by,bz;
194   MPI_Comm         comm;
195   PetscMPIInt      size;
196   PetscBool        isBAIJ;
197   DM_DA            *dd = (DM_DA*)da->data;
198 
199   PetscFunctionBegin;
200   /*
201                                   m
202           ------------------------------------------------------
203          |                                                     |
204          |                                                     |
205          |               ----------------------                |
206          |               |                    |                |
207       n  |           yn  |                    |                |
208          |               |                    |                |
209          |               .---------------------                |
210          |             (xs,ys)     xn                          |
211          |            .                                        |
212          |         (gxs,gys)                                   |
213          |                                                     |
214           -----------------------------------------------------
215   */
216 
217   /*
218          nc - number of components per grid point
219          col - number of colors needed in one direction for single component problem
220 
221   */
222   PetscCall(DMDAGetInfo(da,&dim,NULL,NULL,NULL,&m,&n,&p,&nc,NULL,&bx,&by,&bz,NULL));
223 
224   PetscCall(PetscObjectGetComm((PetscObject)da,&comm));
225   PetscCallMPI(MPI_Comm_size(comm,&size));
226   if (ctype == IS_COLORING_LOCAL) {
227     if (size == 1) {
228       ctype = IS_COLORING_GLOBAL;
229     } else if (dim > 1) {
230       if ((m==1 && bx == DM_BOUNDARY_PERIODIC) || (n==1 && by == DM_BOUNDARY_PERIODIC) || (p==1 && bz == DM_BOUNDARY_PERIODIC)) {
231         SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain  on the same process");
232       }
233     }
234   }
235 
236   /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ
237      matrices is for the blocks, not the individual matrix elements  */
238   PetscCall(PetscStrbeginswith(da->mattype,MATBAIJ,&isBAIJ));
239   if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype,MATMPIBAIJ,&isBAIJ));
240   if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype,MATSEQBAIJ,&isBAIJ));
241   if (isBAIJ) {
242     dd->w  = 1;
243     dd->xs = dd->xs/nc;
244     dd->xe = dd->xe/nc;
245     dd->Xs = dd->Xs/nc;
246     dd->Xe = dd->Xe/nc;
247   }
248 
249   /*
250      We do not provide a getcoloring function in the DMDA operations because
251    the basic DMDA does not know about matrices. We think of DMDA as being
252    more low-level then matrices.
253   */
254   if (dim == 1) {
255     PetscCall(DMCreateColoring_DA_1d_MPIAIJ(da,ctype,coloring));
256   } else if (dim == 2) {
257     PetscCall(DMCreateColoring_DA_2d_MPIAIJ(da,ctype,coloring));
258   } else if (dim == 3) {
259     PetscCall(DMCreateColoring_DA_3d_MPIAIJ(da,ctype,coloring));
260   } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not done for %" PetscInt_FMT " dimension, send us mail petsc-maint@mcs.anl.gov for code",dim);
261   if (isBAIJ) {
262     dd->w  = nc;
263     dd->xs = dd->xs*nc;
264     dd->xe = dd->xe*nc;
265     dd->Xs = dd->Xs*nc;
266     dd->Xe = dd->Xe*nc;
267   }
268   PetscFunctionReturn(0);
269 }
270 
271 /* ---------------------------------------------------------------------------------*/
272 
273 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
274 {
275   PetscInt        xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,M,N,dim,s,k,nc,col;
276   PetscInt        ncolors;
277   MPI_Comm        comm;
278   DMBoundaryType  bx,by;
279   DMDAStencilType st;
280   ISColoringValue *colors;
281   DM_DA           *dd = (DM_DA*)da->data;
282 
283   PetscFunctionBegin;
284   /*
285          nc - number of components per grid point
286          col - number of colors needed in one direction for single component problem
287 
288   */
289   PetscCall(DMDAGetInfo(da,&dim,&m,&n,NULL,&M,&N,NULL,&nc,&s,&bx,&by,NULL,&st));
290   col  = 2*s + 1;
291   PetscCall(DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL));
292   PetscCall(DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL));
293   PetscCall(PetscObjectGetComm((PetscObject)da,&comm));
294 
295   /* special case as taught to us by Paul Hovland */
296   if (st == DMDA_STENCIL_STAR && s == 1) {
297     PetscCall(DMCreateColoring_DA_2d_5pt_MPIAIJ(da,ctype,coloring));
298   } else {
299     if (ctype == IS_COLORING_GLOBAL) {
300       if (!dd->localcoloring) {
301         PetscCall(PetscMalloc1(nc*nx*ny,&colors));
302         ii   = 0;
303         for (j=ys; j<ys+ny; j++) {
304           for (i=xs; i<xs+nx; i++) {
305             for (k=0; k<nc; k++) {
306               colors[ii++] = k + nc*((i % col) + col*(j % col));
307             }
308           }
309         }
310         ncolors = nc + nc*(col-1 + col*(col-1));
311         PetscCall(ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring));
312       }
313       *coloring = dd->localcoloring;
314     } else if (ctype == IS_COLORING_LOCAL) {
315       if (!dd->ghostedcoloring) {
316         PetscCall(PetscMalloc1(nc*gnx*gny,&colors));
317         ii   = 0;
318         for (j=gys; j<gys+gny; j++) {
319           for (i=gxs; i<gxs+gnx; i++) {
320             for (k=0; k<nc; k++) {
321               /* the complicated stuff is to handle periodic boundaries */
322               colors[ii++] = k + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col));
323             }
324           }
325         }
326         ncolors = nc + nc*(col - 1 + col*(col-1));
327         PetscCall(ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring));
328         /* PetscIntView(ncolors,(PetscInt*)colors,0); */
329 
330         PetscCall(ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL));
331       }
332       *coloring = dd->ghostedcoloring;
333     } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
334   }
335   PetscCall(ISColoringReference(*coloring));
336   PetscFunctionReturn(0);
337 }
338 
339 /* ---------------------------------------------------------------------------------*/
340 
341 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
342 {
343   PetscInt        xs,ys,nx,ny,i,j,gxs,gys,gnx,gny,m,n,p,dim,s,k,nc,col,zs,gzs,ii,l,nz,gnz,M,N,P;
344   PetscInt        ncolors;
345   MPI_Comm        comm;
346   DMBoundaryType  bx,by,bz;
347   DMDAStencilType st;
348   ISColoringValue *colors;
349   DM_DA           *dd = (DM_DA*)da->data;
350 
351   PetscFunctionBegin;
352   /*
353          nc - number of components per grid point
354          col - number of colors needed in one direction for single component problem
355 
356   */
357   PetscCall(DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st));
358   col  = 2*s + 1;
359   PetscCall(DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz));
360   PetscCall(DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz));
361   PetscCall(PetscObjectGetComm((PetscObject)da,&comm));
362 
363   /* create the coloring */
364   if (ctype == IS_COLORING_GLOBAL) {
365     if (!dd->localcoloring) {
366       PetscCall(PetscMalloc1(nc*nx*ny*nz,&colors));
367       ii   = 0;
368       for (k=zs; k<zs+nz; k++) {
369         for (j=ys; j<ys+ny; j++) {
370           for (i=xs; i<xs+nx; i++) {
371             for (l=0; l<nc; l++) {
372               colors[ii++] = l + nc*((i % col) + col*(j % col) + col*col*(k % col));
373             }
374           }
375         }
376       }
377       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
378       PetscCall(ISColoringCreate(comm,ncolors,nc*nx*ny*nz,colors,PETSC_OWN_POINTER,&dd->localcoloring));
379     }
380     *coloring = dd->localcoloring;
381   } else if (ctype == IS_COLORING_LOCAL) {
382     if (!dd->ghostedcoloring) {
383       PetscCall(PetscMalloc1(nc*gnx*gny*gnz,&colors));
384       ii   = 0;
385       for (k=gzs; k<gzs+gnz; k++) {
386         for (j=gys; j<gys+gny; j++) {
387           for (i=gxs; i<gxs+gnx; i++) {
388             for (l=0; l<nc; l++) {
389               /* the complicated stuff is to handle periodic boundaries */
390               colors[ii++] = l + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col) + col*col*(SetInRange(k,p) % col));
391             }
392           }
393         }
394       }
395       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
396       PetscCall(ISColoringCreate(comm,ncolors,nc*gnx*gny*gnz,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring));
397       PetscCall(ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL));
398     }
399     *coloring = dd->ghostedcoloring;
400   } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
401   PetscCall(ISColoringReference(*coloring));
402   PetscFunctionReturn(0);
403 }
404 
405 /* ---------------------------------------------------------------------------------*/
406 
407 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
408 {
409   PetscInt        xs,nx,i,i1,gxs,gnx,l,m,M,dim,s,nc,col;
410   PetscInt        ncolors;
411   MPI_Comm        comm;
412   DMBoundaryType  bx;
413   ISColoringValue *colors;
414   DM_DA           *dd = (DM_DA*)da->data;
415 
416   PetscFunctionBegin;
417   /*
418          nc - number of components per grid point
419          col - number of colors needed in one direction for single component problem
420 
421   */
422   PetscCall(DMDAGetInfo(da,&dim,&m,NULL,NULL,&M,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL));
423   col  = 2*s + 1;
424   PetscCall(DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL));
425   PetscCall(DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL));
426   PetscCall(PetscObjectGetComm((PetscObject)da,&comm));
427 
428   /* create the coloring */
429   if (ctype == IS_COLORING_GLOBAL) {
430     if (!dd->localcoloring) {
431       PetscCall(PetscMalloc1(nc*nx,&colors));
432       if (dd->ofillcols) {
433         PetscInt tc = 0;
434         for (i=0; i<nc; i++) tc += (PetscInt) (dd->ofillcols[i] > 0);
435         i1 = 0;
436         for (i=xs; i<xs+nx; i++) {
437           for (l=0; l<nc; l++) {
438             if (dd->ofillcols[l] && (i % col)) {
439               colors[i1++] =  nc - 1 + tc*((i % col) - 1) + dd->ofillcols[l];
440             } else {
441               colors[i1++] = l;
442             }
443           }
444         }
445         ncolors = nc + 2*s*tc;
446       } else {
447         i1 = 0;
448         for (i=xs; i<xs+nx; i++) {
449           for (l=0; l<nc; l++) {
450             colors[i1++] = l + nc*(i % col);
451           }
452         }
453         ncolors = nc + nc*(col-1);
454       }
455       PetscCall(ISColoringCreate(comm,ncolors,nc*nx,colors,PETSC_OWN_POINTER,&dd->localcoloring));
456     }
457     *coloring = dd->localcoloring;
458   } else if (ctype == IS_COLORING_LOCAL) {
459     if (!dd->ghostedcoloring) {
460       PetscCall(PetscMalloc1(nc*gnx,&colors));
461       i1   = 0;
462       for (i=gxs; i<gxs+gnx; i++) {
463         for (l=0; l<nc; l++) {
464           /* the complicated stuff is to handle periodic boundaries */
465           colors[i1++] = l + nc*(SetInRange(i,m) % col);
466         }
467       }
468       ncolors = nc + nc*(col-1);
469       PetscCall(ISColoringCreate(comm,ncolors,nc*gnx,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring));
470       PetscCall(ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL));
471     }
472     *coloring = dd->ghostedcoloring;
473   } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
474   PetscCall(ISColoringReference(*coloring));
475   PetscFunctionReturn(0);
476 }
477 
478 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
479 {
480   PetscInt        xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,dim,s,k,nc;
481   PetscInt        ncolors;
482   MPI_Comm        comm;
483   DMBoundaryType  bx,by;
484   ISColoringValue *colors;
485   DM_DA           *dd = (DM_DA*)da->data;
486 
487   PetscFunctionBegin;
488   /*
489          nc - number of components per grid point
490          col - number of colors needed in one direction for single component problem
491 
492   */
493   PetscCall(DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,NULL));
494   PetscCall(DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL));
495   PetscCall(DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL));
496   PetscCall(PetscObjectGetComm((PetscObject)da,&comm));
497   /* create the coloring */
498   if (ctype == IS_COLORING_GLOBAL) {
499     if (!dd->localcoloring) {
500       PetscCall(PetscMalloc1(nc*nx*ny,&colors));
501       ii   = 0;
502       for (j=ys; j<ys+ny; j++) {
503         for (i=xs; i<xs+nx; i++) {
504           for (k=0; k<nc; k++) {
505             colors[ii++] = k + nc*((3*j+i) % 5);
506           }
507         }
508       }
509       ncolors = 5*nc;
510       PetscCall(ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring));
511     }
512     *coloring = dd->localcoloring;
513   } else if (ctype == IS_COLORING_LOCAL) {
514     if (!dd->ghostedcoloring) {
515       PetscCall(PetscMalloc1(nc*gnx*gny,&colors));
516       ii = 0;
517       for (j=gys; j<gys+gny; j++) {
518         for (i=gxs; i<gxs+gnx; i++) {
519           for (k=0; k<nc; k++) {
520             colors[ii++] = k + nc*((3*SetInRange(j,n) + SetInRange(i,m)) % 5);
521           }
522         }
523       }
524       ncolors = 5*nc;
525       PetscCall(ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring));
526       PetscCall(ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL));
527     }
528     *coloring = dd->ghostedcoloring;
529   } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
530   PetscFunctionReturn(0);
531 }
532 
533 /* =========================================================================== */
534 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM,Mat);
535 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM,Mat);
536 extern PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM,Mat);
537 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM,Mat);
538 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM,Mat);
539 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM,Mat);
540 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM,Mat);
541 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM,Mat);
542 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM,Mat);
543 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM,Mat);
544 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM,Mat);
545 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM,Mat);
546 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM,Mat);
547 extern PetscErrorCode DMCreateMatrix_DA_IS(DM,Mat);
548 
549 /*@C
550    MatSetupDM - Sets the DMDA that is to be used by the HYPRE_StructMatrix PETSc matrix
551 
552    Logically Collective on mat
553 
554    Input Parameters:
555 +  mat - the matrix
556 -  da - the da
557 
558    Level: intermediate
559 
560 @*/
561 PetscErrorCode MatSetupDM(Mat mat,DM da)
562 {
563   PetscFunctionBegin;
564   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
565   PetscValidHeaderSpecificType(da,DM_CLASSID,2,DMDA);
566   PetscTryMethod(mat,"MatSetupDM_C",(Mat,DM),(mat,da));
567   PetscFunctionReturn(0);
568 }
569 
570 PetscErrorCode  MatView_MPI_DA(Mat A,PetscViewer viewer)
571 {
572   DM                da;
573   const char        *prefix;
574   Mat               Anatural;
575   AO                ao;
576   PetscInt          rstart,rend,*petsc,i;
577   IS                is;
578   MPI_Comm          comm;
579   PetscViewerFormat format;
580 
581   PetscFunctionBegin;
582   /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */
583   PetscCall(PetscViewerGetFormat(viewer,&format));
584   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(0);
585 
586   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
587   PetscCall(MatGetDM(A, &da));
588   PetscCheck(da,PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
589 
590   PetscCall(DMDAGetAO(da,&ao));
591   PetscCall(MatGetOwnershipRange(A,&rstart,&rend));
592   PetscCall(PetscMalloc1(rend-rstart,&petsc));
593   for (i=rstart; i<rend; i++) petsc[i-rstart] = i;
594   PetscCall(AOApplicationToPetsc(ao,rend-rstart,petsc));
595   PetscCall(ISCreateGeneral(comm,rend-rstart,petsc,PETSC_OWN_POINTER,&is));
596 
597   /* call viewer on natural ordering */
598   PetscCall(MatCreateSubMatrix(A,is,is,MAT_INITIAL_MATRIX,&Anatural));
599   PetscCall(ISDestroy(&is));
600   PetscCall(PetscObjectGetOptionsPrefix((PetscObject)A,&prefix));
601   PetscCall(PetscObjectSetOptionsPrefix((PetscObject)Anatural,prefix));
602   PetscCall(PetscObjectSetName((PetscObject)Anatural,((PetscObject)A)->name));
603   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE;
604   PetscCall(MatView(Anatural,viewer));
605   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE;
606   PetscCall(MatDestroy(&Anatural));
607   PetscFunctionReturn(0);
608 }
609 
610 PetscErrorCode  MatLoad_MPI_DA(Mat A,PetscViewer viewer)
611 {
612   DM             da;
613   Mat            Anatural,Aapp;
614   AO             ao;
615   PetscInt       rstart,rend,*app,i,m,n,M,N;
616   IS             is;
617   MPI_Comm       comm;
618 
619   PetscFunctionBegin;
620   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
621   PetscCall(MatGetDM(A, &da));
622   PetscCheck(da,PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
623 
624   /* Load the matrix in natural ordering */
625   PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&Anatural));
626   PetscCall(MatSetType(Anatural,((PetscObject)A)->type_name));
627   PetscCall(MatGetSize(A,&M,&N));
628   PetscCall(MatGetLocalSize(A,&m,&n));
629   PetscCall(MatSetSizes(Anatural,m,n,M,N));
630   PetscCall(MatLoad(Anatural,viewer));
631 
632   /* Map natural ordering to application ordering and create IS */
633   PetscCall(DMDAGetAO(da,&ao));
634   PetscCall(MatGetOwnershipRange(Anatural,&rstart,&rend));
635   PetscCall(PetscMalloc1(rend-rstart,&app));
636   for (i=rstart; i<rend; i++) app[i-rstart] = i;
637   PetscCall(AOPetscToApplication(ao,rend-rstart,app));
638   PetscCall(ISCreateGeneral(comm,rend-rstart,app,PETSC_OWN_POINTER,&is));
639 
640   /* Do permutation and replace header */
641   PetscCall(MatCreateSubMatrix(Anatural,is,is,MAT_INITIAL_MATRIX,&Aapp));
642   PetscCall(MatHeaderReplace(A,&Aapp));
643   PetscCall(ISDestroy(&is));
644   PetscCall(MatDestroy(&Anatural));
645   PetscFunctionReturn(0);
646 }
647 
648 PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J)
649 {
650   PetscInt       dim,dof,nx,ny,nz,dims[3],starts[3],M,N,P;
651   Mat            A;
652   MPI_Comm       comm;
653   MatType        Atype;
654   void           (*aij)(void)=NULL,(*baij)(void)=NULL,(*sbaij)(void)=NULL,(*sell)(void)=NULL,(*is)(void)=NULL;
655   MatType        mtype;
656   PetscMPIInt    size;
657   DM_DA          *dd = (DM_DA*)da->data;
658 
659   PetscFunctionBegin;
660   PetscCall(MatInitializePackage());
661   mtype = da->mattype;
662 
663   /*
664                                   m
665           ------------------------------------------------------
666          |                                                     |
667          |                                                     |
668          |               ----------------------                |
669          |               |                    |                |
670       n  |           ny  |                    |                |
671          |               |                    |                |
672          |               .---------------------                |
673          |             (xs,ys)     nx                          |
674          |            .                                        |
675          |         (gxs,gys)                                   |
676          |                                                     |
677           -----------------------------------------------------
678   */
679 
680   /*
681          nc - number of components per grid point
682          col - number of colors needed in one direction for single component problem
683 
684   */
685   M   = dd->M;
686   N   = dd->N;
687   P   = dd->P;
688   dim = da->dim;
689   dof = dd->w;
690   /* PetscCall(DMDAGetInfo(da,&dim,&M,&N,&P,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL)); */
691   PetscCall(DMDAGetCorners(da,NULL,NULL,NULL,&nx,&ny,&nz));
692   PetscCall(PetscObjectGetComm((PetscObject)da,&comm));
693   PetscCall(MatCreate(comm,&A));
694   PetscCall(MatSetSizes(A,dof*nx*ny*nz,dof*nx*ny*nz,dof*M*N*P,dof*M*N*P));
695   PetscCall(MatSetType(A,mtype));
696   PetscCall(MatSetFromOptions(A));
697   if (dof*nx*ny*nz < da->bind_below) {
698     PetscCall(MatSetBindingPropagates(A,PETSC_TRUE));
699     PetscCall(MatBindToCPU(A,PETSC_TRUE));
700   }
701   PetscCall(MatSetDM(A,da));
702   if (da->structure_only) {
703     PetscCall(MatSetOption(A,MAT_STRUCTURE_ONLY,PETSC_TRUE));
704   }
705   PetscCall(MatGetType(A,&Atype));
706   /*
707      We do not provide a getmatrix function in the DMDA operations because
708    the basic DMDA does not know about matrices. We think of DMDA as being more
709    more low-level than matrices. This is kind of cheating but, cause sometimes
710    we think of DMDA has higher level than matrices.
711 
712      We could switch based on Atype (or mtype), but we do not since the
713    specialized setting routines depend only on the particular preallocation
714    details of the matrix, not the type itself.
715   */
716   PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJSetPreallocation_C",&aij));
717   if (!aij) {
718     PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJSetPreallocation_C",&aij));
719   }
720   if (!aij) {
721     PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatMPIBAIJSetPreallocation_C",&baij));
722     if (!baij) {
723       PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",&baij));
724     }
725     if (!baij) {
726       PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatMPISBAIJSetPreallocation_C",&sbaij));
727       if (!sbaij) {
728         PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C",&sbaij));
729       }
730       if (!sbaij) {
731         PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatMPISELLSetPreallocation_C",&sell));
732         if (!sell) {
733           PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatSeqSELLSetPreallocation_C",&sell));
734         }
735       }
736       if (!sell) {
737         PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatISSetPreallocation_C",&is));
738       }
739     }
740   }
741   if (aij) {
742     if (dim == 1) {
743       if (dd->ofill) {
744         PetscCall(DMCreateMatrix_DA_1d_MPIAIJ_Fill(da,A));
745       } else {
746         DMBoundaryType bx;
747         PetscMPIInt  size;
748         PetscCall(DMDAGetInfo(da,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&bx,NULL,NULL,NULL));
749         PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da),&size));
750         if (size == 1 && bx == DM_BOUNDARY_NONE) {
751           PetscCall(DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(da,A));
752         } else {
753           PetscCall(DMCreateMatrix_DA_1d_MPIAIJ(da,A));
754         }
755       }
756     } else if (dim == 2) {
757       if (dd->ofill) {
758         PetscCall(DMCreateMatrix_DA_2d_MPIAIJ_Fill(da,A));
759       } else {
760         PetscCall(DMCreateMatrix_DA_2d_MPIAIJ(da,A));
761       }
762     } else if (dim == 3) {
763       if (dd->ofill) {
764         PetscCall(DMCreateMatrix_DA_3d_MPIAIJ_Fill(da,A));
765       } else {
766         PetscCall(DMCreateMatrix_DA_3d_MPIAIJ(da,A));
767       }
768     }
769   } else if (baij) {
770     if (dim == 2) {
771       PetscCall(DMCreateMatrix_DA_2d_MPIBAIJ(da,A));
772     } else if (dim == 3) {
773       PetscCall(DMCreateMatrix_DA_3d_MPIBAIJ(da,A));
774     } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
775   } else if (sbaij) {
776     if (dim == 2) {
777       PetscCall(DMCreateMatrix_DA_2d_MPISBAIJ(da,A));
778     } else if (dim == 3) {
779       PetscCall(DMCreateMatrix_DA_3d_MPISBAIJ(da,A));
780     } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
781   } else if (sell) {
782      if (dim == 2) {
783        PetscCall(DMCreateMatrix_DA_2d_MPISELL(da,A));
784      } else if (dim == 3) {
785        PetscCall(DMCreateMatrix_DA_3d_MPISELL(da,A));
786      } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
787   } else if (is) {
788     PetscCall(DMCreateMatrix_DA_IS(da,A));
789   } else {
790     ISLocalToGlobalMapping ltog;
791 
792     PetscCall(MatSetBlockSize(A,dof));
793     PetscCall(MatSetUp(A));
794     PetscCall(DMGetLocalToGlobalMapping(da,&ltog));
795     PetscCall(MatSetLocalToGlobalMapping(A,ltog,ltog));
796   }
797   PetscCall(DMDAGetGhostCorners(da,&starts[0],&starts[1],&starts[2],&dims[0],&dims[1],&dims[2]));
798   PetscCall(MatSetStencil(A,dim,dims,starts,dof));
799   PetscCall(MatSetDM(A,da));
800   PetscCallMPI(MPI_Comm_size(comm,&size));
801   if (size > 1) {
802     /* change viewer to display matrix in natural ordering */
803     PetscCall(MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA));
804     PetscCall(MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA));
805   }
806   *J = A;
807   PetscFunctionReturn(0);
808 }
809 
810 /* ---------------------------------------------------------------------------------*/
811 PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
812 
813 PetscErrorCode DMCreateMatrix_DA_IS(DM dm,Mat J)
814 {
815   DM_DA                  *da = (DM_DA*)dm->data;
816   Mat                    lJ,P;
817   ISLocalToGlobalMapping ltog;
818   IS                     is;
819   PetscBT                bt;
820   const PetscInt         *e_loc,*idx;
821   PetscInt               i,nel,nen,nv,dof,*gidx,n,N;
822 
823   /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted)
824      We need to filter out the local indices that are not represented through the DMDAGetElements decomposition */
825   PetscFunctionBegin;
826   dof  = da->w;
827   PetscCall(MatSetBlockSize(J,dof));
828   PetscCall(DMGetLocalToGlobalMapping(dm,&ltog));
829 
830   /* flag local elements indices in local DMDA numbering */
831   PetscCall(ISLocalToGlobalMappingGetSize(ltog,&nv));
832   PetscCall(PetscBTCreate(nv/dof,&bt));
833   PetscCall(DMDAGetElements(dm,&nel,&nen,&e_loc)); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */
834   for (i=0;i<nel*nen;i++) PetscCall(PetscBTSet(bt,e_loc[i]));
835 
836   /* filter out (set to -1) the global indices not used by the local elements */
837   PetscCall(PetscMalloc1(nv/dof,&gidx));
838   PetscCall(ISLocalToGlobalMappingGetBlockIndices(ltog,&idx));
839   PetscCall(PetscArraycpy(gidx,idx,nv/dof));
840   PetscCall(ISLocalToGlobalMappingRestoreBlockIndices(ltog,&idx));
841   for (i=0;i<nv/dof;i++) if (!PetscBTLookup(bt,i)) gidx[i] = -1;
842   PetscCall(PetscBTDestroy(&bt));
843   PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)dm),dof,nv/dof,gidx,PETSC_OWN_POINTER,&is));
844   PetscCall(ISLocalToGlobalMappingCreateIS(is,&ltog));
845   PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog));
846   PetscCall(ISLocalToGlobalMappingDestroy(&ltog));
847   PetscCall(ISDestroy(&is));
848 
849   /* Preallocation */
850   if (dm->prealloc_skip) {
851     PetscCall(MatSetUp(J));
852   } else {
853     PetscCall(MatISGetLocalMat(J,&lJ));
854     PetscCall(MatGetLocalToGlobalMapping(lJ,&ltog,NULL));
855     PetscCall(MatCreate(PetscObjectComm((PetscObject)lJ),&P));
856     PetscCall(MatSetType(P,MATPREALLOCATOR));
857     PetscCall(MatSetLocalToGlobalMapping(P,ltog,ltog));
858     PetscCall(MatGetSize(lJ,&N,NULL));
859     PetscCall(MatGetLocalSize(lJ,&n,NULL));
860     PetscCall(MatSetSizes(P,n,n,N,N));
861     PetscCall(MatSetBlockSize(P,dof));
862     PetscCall(MatSetUp(P));
863     for (i=0;i<nel;i++) {
864       PetscCall(MatSetValuesBlockedLocal(P,nen,e_loc+i*nen,nen,e_loc+i*nen,NULL,INSERT_VALUES));
865     }
866     PetscCall(MatPreallocatorPreallocate(P,(PetscBool)!da->prealloc_only,lJ));
867     PetscCall(MatISRestoreLocalMat(J,&lJ));
868     PetscCall(DMDARestoreElements(dm,&nel,&nen,&e_loc));
869     PetscCall(MatDestroy(&P));
870 
871     PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY));
872     PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY));
873   }
874   PetscFunctionReturn(0);
875 }
876 
877 PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da,Mat J)
878 {
879   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p;
880   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
881   MPI_Comm               comm;
882   PetscScalar            *values;
883   DMBoundaryType         bx,by;
884   ISLocalToGlobalMapping ltog;
885   DMDAStencilType        st;
886 
887   PetscFunctionBegin;
888   /*
889          nc - number of components per grid point
890          col - number of colors needed in one direction for single component problem
891 
892   */
893   PetscCall(DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st));
894   col  = 2*s + 1;
895   PetscCall(DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL));
896   PetscCall(DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL));
897   PetscCall(PetscObjectGetComm((PetscObject)da,&comm));
898 
899   PetscCall(PetscMalloc2(nc,&rows,col*col*nc*nc,&cols));
900   PetscCall(DMGetLocalToGlobalMapping(da,&ltog));
901 
902   PetscCall(MatSetBlockSize(J,nc));
903   /* determine the matrix preallocation information */
904   MatPreallocateBegin(comm,nc*nx*ny,nc*nx*ny,dnz,onz);
905   for (i=xs; i<xs+nx; i++) {
906 
907     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
908     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
909 
910     for (j=ys; j<ys+ny; j++) {
911       slot = i - gxs + gnx*(j - gys);
912 
913       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
914       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
915 
916       cnt = 0;
917       for (k=0; k<nc; k++) {
918         for (l=lstart; l<lend+1; l++) {
919           for (p=pstart; p<pend+1; p++) {
920             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
921               cols[cnt++] = k + nc*(slot + gnx*l + p);
922             }
923           }
924         }
925         rows[k] = k + nc*(slot);
926       }
927       PetscCall(MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz));
928     }
929   }
930   PetscCall(MatSetBlockSize(J,nc));
931   PetscCall(MatSeqSELLSetPreallocation(J,0,dnz));
932   PetscCall(MatMPISELLSetPreallocation(J,0,dnz,0,onz));
933   MatPreallocateEnd(dnz,onz);
934 
935   PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog));
936 
937   /*
938     For each node in the grid: we get the neighbors in the local (on processor ordering
939     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
940     PETSc ordering.
941   */
942   if (!da->prealloc_only) {
943     PetscCall(PetscCalloc1(col*col*nc*nc,&values));
944     for (i=xs; i<xs+nx; i++) {
945 
946       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
947       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
948 
949       for (j=ys; j<ys+ny; j++) {
950         slot = i - gxs + gnx*(j - gys);
951 
952         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
953         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
954 
955         cnt = 0;
956         for (k=0; k<nc; k++) {
957           for (l=lstart; l<lend+1; l++) {
958             for (p=pstart; p<pend+1; p++) {
959               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
960                 cols[cnt++] = k + nc*(slot + gnx*l + p);
961               }
962             }
963           }
964           rows[k] = k + nc*(slot);
965         }
966         PetscCall(MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES));
967       }
968     }
969     PetscCall(PetscFree(values));
970     /* do not copy values to GPU since they are all zero and not yet needed there */
971     PetscCall(MatBindToCPU(J,PETSC_TRUE));
972     PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY));
973     PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY));
974     PetscCall(MatBindToCPU(J,PETSC_FALSE));
975     PetscCall(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
976   }
977   PetscCall(PetscFree2(rows,cols));
978   PetscFunctionReturn(0);
979 }
980 
981 PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da,Mat J)
982 {
983   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
984   PetscInt               m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL;
985   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
986   MPI_Comm               comm;
987   PetscScalar            *values;
988   DMBoundaryType         bx,by,bz;
989   ISLocalToGlobalMapping ltog;
990   DMDAStencilType        st;
991 
992   PetscFunctionBegin;
993   /*
994          nc - number of components per grid point
995          col - number of colors needed in one direction for single component problem
996 
997   */
998   PetscCall(DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st));
999   col  = 2*s + 1;
1000   PetscCall(DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz));
1001   PetscCall(DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz));
1002   PetscCall(PetscObjectGetComm((PetscObject)da,&comm));
1003 
1004   PetscCall(PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols));
1005   PetscCall(DMGetLocalToGlobalMapping(da,&ltog));
1006 
1007   PetscCall(MatSetBlockSize(J,nc));
1008   /* determine the matrix preallocation information */
1009   MatPreallocateBegin(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);
1010   for (i=xs; i<xs+nx; i++) {
1011     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1012     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1013     for (j=ys; j<ys+ny; j++) {
1014       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1015       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1016       for (k=zs; k<zs+nz; k++) {
1017         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1018         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1019 
1020         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1021 
1022         cnt = 0;
1023         for (l=0; l<nc; l++) {
1024           for (ii=istart; ii<iend+1; ii++) {
1025             for (jj=jstart; jj<jend+1; jj++) {
1026               for (kk=kstart; kk<kend+1; kk++) {
1027                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1028                   cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1029                 }
1030               }
1031             }
1032           }
1033           rows[l] = l + nc*(slot);
1034         }
1035         PetscCall(MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz));
1036       }
1037     }
1038   }
1039   PetscCall(MatSetBlockSize(J,nc));
1040   PetscCall(MatSeqSELLSetPreallocation(J,0,dnz));
1041   PetscCall(MatMPISELLSetPreallocation(J,0,dnz,0,onz));
1042   MatPreallocateEnd(dnz,onz);
1043   PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog));
1044 
1045   /*
1046     For each node in the grid: we get the neighbors in the local (on processor ordering
1047     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1048     PETSc ordering.
1049   */
1050   if (!da->prealloc_only) {
1051     PetscCall(PetscCalloc1(col*col*col*nc*nc*nc,&values));
1052     for (i=xs; i<xs+nx; i++) {
1053       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1054       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1055       for (j=ys; j<ys+ny; j++) {
1056         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1057         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1058         for (k=zs; k<zs+nz; k++) {
1059           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1060           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1061 
1062           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1063 
1064           cnt = 0;
1065           for (l=0; l<nc; l++) {
1066             for (ii=istart; ii<iend+1; ii++) {
1067               for (jj=jstart; jj<jend+1; jj++) {
1068                 for (kk=kstart; kk<kend+1; kk++) {
1069                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1070                     cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1071                   }
1072                 }
1073               }
1074             }
1075             rows[l] = l + nc*(slot);
1076           }
1077           PetscCall(MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES));
1078         }
1079       }
1080     }
1081     PetscCall(PetscFree(values));
1082     /* do not copy values to GPU since they are all zero and not yet needed there */
1083     PetscCall(MatBindToCPU(J,PETSC_TRUE));
1084     PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY));
1085     PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY));
1086     PetscCall(MatBindToCPU(J,PETSC_FALSE));
1087     PetscCall(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
1088   }
1089   PetscCall(PetscFree2(rows,cols));
1090   PetscFunctionReturn(0);
1091 }
1092 
1093 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da,Mat J)
1094 {
1095   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,M,N;
1096   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
1097   MPI_Comm               comm;
1098   DMBoundaryType         bx,by;
1099   ISLocalToGlobalMapping ltog,mltog;
1100   DMDAStencilType        st;
1101   PetscBool              removedups = PETSC_FALSE,alreadyboundtocpu = PETSC_TRUE;
1102 
1103   PetscFunctionBegin;
1104   /*
1105          nc - number of components per grid point
1106          col - number of colors needed in one direction for single component problem
1107 
1108   */
1109   PetscCall(DMDAGetInfo(da,&dim,&m,&n,&M,&N,NULL,NULL,&nc,&s,&bx,&by,NULL,&st));
1110   if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) {
1111     PetscCall(MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE));
1112   }
1113   col  = 2*s + 1;
1114   /*
1115        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1116        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1117   */
1118   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1119   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1120   PetscCall(DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL));
1121   PetscCall(DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL));
1122   PetscCall(PetscObjectGetComm((PetscObject)da,&comm));
1123 
1124   PetscCall(PetscMalloc2(nc,&rows,col*col*nc*nc,&cols));
1125   PetscCall(DMGetLocalToGlobalMapping(da,&ltog));
1126 
1127   PetscCall(MatSetBlockSize(J,nc));
1128   /* determine the matrix preallocation information */
1129   MatPreallocateBegin(comm,nc*nx*ny,nc*nx*ny,dnz,onz);
1130   for (i=xs; i<xs+nx; i++) {
1131     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1132     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1133 
1134     for (j=ys; j<ys+ny; j++) {
1135       slot = i - gxs + gnx*(j - gys);
1136 
1137       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1138       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1139 
1140       cnt = 0;
1141       for (k=0; k<nc; k++) {
1142         for (l=lstart; l<lend+1; l++) {
1143           for (p=pstart; p<pend+1; p++) {
1144             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
1145               cols[cnt++] = k + nc*(slot + gnx*l + p);
1146             }
1147           }
1148         }
1149         rows[k] = k + nc*(slot);
1150       }
1151       if (removedups) {
1152         PetscCall(MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz));
1153       } else {
1154         PetscCall(MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz));
1155       }
1156     }
1157   }
1158   PetscCall(MatSetBlockSize(J,nc));
1159   PetscCall(MatSeqAIJSetPreallocation(J,0,dnz));
1160   PetscCall(MatMPIAIJSetPreallocation(J,0,dnz,0,onz));
1161   MatPreallocateEnd(dnz,onz);
1162   PetscCall(MatGetLocalToGlobalMapping(J,&mltog,NULL));
1163   if (!mltog) {
1164     PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog));
1165   }
1166 
1167   /*
1168     For each node in the grid: we get the neighbors in the local (on processor ordering
1169     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1170     PETSc ordering.
1171   */
1172   if (!da->prealloc_only) {
1173     for (i=xs; i<xs+nx; i++) {
1174 
1175       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1176       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1177 
1178       for (j=ys; j<ys+ny; j++) {
1179         slot = i - gxs + gnx*(j - gys);
1180 
1181         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1182         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1183 
1184         cnt = 0;
1185         for (l=lstart; l<lend+1; l++) {
1186           for (p=pstart; p<pend+1; p++) {
1187             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
1188               cols[cnt++] = nc*(slot + gnx*l + p);
1189               for (k=1; k<nc; k++) {
1190                 cols[cnt] = 1 + cols[cnt-1];cnt++;
1191               }
1192             }
1193           }
1194         }
1195         for (k=0; k<nc; k++) rows[k] = k + nc*(slot);
1196         PetscCall(MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES));
1197       }
1198     }
1199     /* do not copy values to GPU since they are all zero and not yet needed there */
1200     PetscCall(MatBoundToCPU(J,&alreadyboundtocpu));
1201     PetscCall(MatBindToCPU(J,PETSC_TRUE));
1202     PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY));
1203     PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY));
1204     if (!alreadyboundtocpu) PetscCall(MatBindToCPU(J,PETSC_FALSE));
1205     PetscCall(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
1206     if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) {
1207       PetscCall(MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE));
1208     }
1209   }
1210   PetscCall(PetscFree2(rows,cols));
1211   PetscFunctionReturn(0);
1212 }
1213 
1214 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da,Mat J)
1215 {
1216   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1217   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt,maxcnt = 0,l,p,M,N;
1218   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
1219   DM_DA                  *dd = (DM_DA*)da->data;
1220   PetscInt               ifill_col,*ofill = dd->ofill, *dfill = dd->dfill;
1221   MPI_Comm               comm;
1222   DMBoundaryType         bx,by;
1223   ISLocalToGlobalMapping ltog;
1224   DMDAStencilType        st;
1225   PetscBool              removedups = PETSC_FALSE;
1226 
1227   PetscFunctionBegin;
1228   /*
1229          nc - number of components per grid point
1230          col - number of colors needed in one direction for single component problem
1231 
1232   */
1233   PetscCall(DMDAGetInfo(da,&dim,&m,&n,&M,&N,NULL,NULL,&nc,&s,&bx,&by,NULL,&st));
1234   col  = 2*s + 1;
1235   /*
1236        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1237        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1238   */
1239   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1240   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1241   PetscCall(DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL));
1242   PetscCall(DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL));
1243   PetscCall(PetscObjectGetComm((PetscObject)da,&comm));
1244 
1245   PetscCall(PetscMalloc1(col*col*nc,&cols));
1246   PetscCall(DMGetLocalToGlobalMapping(da,&ltog));
1247 
1248   PetscCall(MatSetBlockSize(J,nc));
1249   /* determine the matrix preallocation information */
1250   MatPreallocateBegin(comm,nc*nx*ny,nc*nx*ny,dnz,onz);
1251   for (i=xs; i<xs+nx; i++) {
1252 
1253     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1254     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1255 
1256     for (j=ys; j<ys+ny; j++) {
1257       slot = i - gxs + gnx*(j - gys);
1258 
1259       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1260       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1261 
1262       for (k=0; k<nc; k++) {
1263         cnt = 0;
1264         for (l=lstart; l<lend+1; l++) {
1265           for (p=pstart; p<pend+1; p++) {
1266             if (l || p) {
1267               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
1268                 for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p);
1269               }
1270             } else {
1271               if (dfill) {
1272                 for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p);
1273               } else {
1274                 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p);
1275               }
1276             }
1277           }
1278         }
1279         row    = k + nc*(slot);
1280         maxcnt = PetscMax(maxcnt,cnt);
1281         if (removedups) {
1282           PetscCall(MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz));
1283         } else {
1284           PetscCall(MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz));
1285         }
1286       }
1287     }
1288   }
1289   PetscCall(MatSeqAIJSetPreallocation(J,0,dnz));
1290   PetscCall(MatMPIAIJSetPreallocation(J,0,dnz,0,onz));
1291   MatPreallocateEnd(dnz,onz);
1292   PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog));
1293 
1294   /*
1295     For each node in the grid: we get the neighbors in the local (on processor ordering
1296     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1297     PETSc ordering.
1298   */
1299   if (!da->prealloc_only) {
1300     for (i=xs; i<xs+nx; i++) {
1301 
1302       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1303       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1304 
1305       for (j=ys; j<ys+ny; j++) {
1306         slot = i - gxs + gnx*(j - gys);
1307 
1308         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1309         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1310 
1311         for (k=0; k<nc; k++) {
1312           cnt = 0;
1313           for (l=lstart; l<lend+1; l++) {
1314             for (p=pstart; p<pend+1; p++) {
1315               if (l || p) {
1316                 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
1317                   for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p);
1318                 }
1319               } else {
1320                 if (dfill) {
1321                   for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p);
1322                 } else {
1323                   for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p);
1324                 }
1325               }
1326             }
1327           }
1328           row  = k + nc*(slot);
1329           PetscCall(MatSetValuesLocal(J,1,&row,cnt,cols,NULL,INSERT_VALUES));
1330         }
1331       }
1332     }
1333     /* do not copy values to GPU since they are all zero and not yet needed there */
1334     PetscCall(MatBindToCPU(J,PETSC_TRUE));
1335     PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY));
1336     PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY));
1337     PetscCall(MatBindToCPU(J,PETSC_FALSE));
1338     PetscCall(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
1339   }
1340   PetscCall(PetscFree(cols));
1341   PetscFunctionReturn(0);
1342 }
1343 
1344 /* ---------------------------------------------------------------------------------*/
1345 
1346 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da,Mat J)
1347 {
1348   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1349   PetscInt               m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL;
1350   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
1351   MPI_Comm               comm;
1352   DMBoundaryType         bx,by,bz;
1353   ISLocalToGlobalMapping ltog,mltog;
1354   DMDAStencilType        st;
1355   PetscBool              removedups = PETSC_FALSE;
1356 
1357   PetscFunctionBegin;
1358   /*
1359          nc - number of components per grid point
1360          col - number of colors needed in one direction for single component problem
1361 
1362   */
1363   PetscCall(DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st));
1364   if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) {
1365     PetscCall(MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE));
1366   }
1367   col  = 2*s + 1;
1368 
1369   /*
1370        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1371        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1372   */
1373   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1374   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1375   if (P == 1 && 2*s >= p) removedups = PETSC_TRUE;
1376 
1377   PetscCall(DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz));
1378   PetscCall(DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz));
1379   PetscCall(PetscObjectGetComm((PetscObject)da,&comm));
1380 
1381   PetscCall(PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols));
1382   PetscCall(DMGetLocalToGlobalMapping(da,&ltog));
1383 
1384   PetscCall(MatSetBlockSize(J,nc));
1385   /* determine the matrix preallocation information */
1386   MatPreallocateBegin(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);
1387   for (i=xs; i<xs+nx; i++) {
1388     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1389     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1390     for (j=ys; j<ys+ny; j++) {
1391       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1392       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1393       for (k=zs; k<zs+nz; k++) {
1394         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1395         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1396 
1397         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1398 
1399         cnt = 0;
1400         for (l=0; l<nc; l++) {
1401           for (ii=istart; ii<iend+1; ii++) {
1402             for (jj=jstart; jj<jend+1; jj++) {
1403               for (kk=kstart; kk<kend+1; kk++) {
1404                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1405                   cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1406                 }
1407               }
1408             }
1409           }
1410           rows[l] = l + nc*(slot);
1411         }
1412         if (removedups) {
1413           PetscCall(MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz));
1414         } else {
1415           PetscCall(MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz));
1416         }
1417       }
1418     }
1419   }
1420   PetscCall(MatSetBlockSize(J,nc));
1421   PetscCall(MatSeqAIJSetPreallocation(J,0,dnz));
1422   PetscCall(MatMPIAIJSetPreallocation(J,0,dnz,0,onz));
1423   MatPreallocateEnd(dnz,onz);
1424   PetscCall(MatGetLocalToGlobalMapping(J,&mltog,NULL));
1425   if (!mltog) {
1426     PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog));
1427   }
1428 
1429   /*
1430     For each node in the grid: we get the neighbors in the local (on processor ordering
1431     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1432     PETSc ordering.
1433   */
1434   if (!da->prealloc_only) {
1435     for (i=xs; i<xs+nx; i++) {
1436       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1437       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1438       for (j=ys; j<ys+ny; j++) {
1439         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1440         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1441         for (k=zs; k<zs+nz; k++) {
1442           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1443           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1444 
1445           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1446 
1447           cnt = 0;
1448           for (kk=kstart; kk<kend+1; kk++) {
1449             for (jj=jstart; jj<jend+1; jj++) {
1450               for (ii=istart; ii<iend+1; ii++) {
1451                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1452                   cols[cnt++] = nc*(slot + ii + gnx*jj + gnx*gny*kk);
1453                     for (l=1; l<nc; l++) {
1454                       cols[cnt] = 1 + cols[cnt-1];cnt++;
1455                   }
1456                 }
1457               }
1458             }
1459           }
1460           rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1];
1461           PetscCall(MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES));
1462         }
1463       }
1464     }
1465     /* do not copy values to GPU since they are all zero and not yet needed there */
1466     PetscCall(MatBindToCPU(J,PETSC_TRUE));
1467     PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY));
1468     PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY));
1469     if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) {
1470       PetscCall(MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE));
1471     }
1472     PetscCall(MatBindToCPU(J,PETSC_FALSE));
1473     PetscCall(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
1474   }
1475   PetscCall(PetscFree2(rows,cols));
1476   PetscFunctionReturn(0);
1477 }
1478 
1479 /* ---------------------------------------------------------------------------------*/
1480 
1481 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da,Mat J)
1482 {
1483   DM_DA                  *dd = (DM_DA*)da->data;
1484   PetscInt               xs,nx,i,j,gxs,gnx,row,k,l;
1485   PetscInt               m,dim,s,*cols = NULL,nc,cnt,maxcnt = 0,*ocols;
1486   PetscInt               *ofill = dd->ofill,*dfill = dd->dfill;
1487   DMBoundaryType         bx;
1488   ISLocalToGlobalMapping ltog;
1489   PetscMPIInt            rank,size;
1490 
1491   PetscFunctionBegin;
1492   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)da),&rank));
1493   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da),&size));
1494 
1495   /*
1496          nc - number of components per grid point
1497 
1498   */
1499   PetscCall(DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL));
1500   PetscCheck(s <= 1,PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Matrix creation for 1d not implemented correctly for stencil width larger than 1");
1501   PetscCall(DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL));
1502   PetscCall(DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL));
1503 
1504   PetscCall(MatSetBlockSize(J,nc));
1505   PetscCall(PetscCalloc2(nx*nc,&cols,nx*nc,&ocols));
1506 
1507   /*
1508         note should be smaller for first and last process with no periodic
1509         does not handle dfill
1510   */
1511   cnt = 0;
1512   /* coupling with process to the left */
1513   for (i=0; i<s; i++) {
1514     for (j=0; j<nc; j++) {
1515       ocols[cnt] = ((rank == 0) ? 0 : (s - i)*(ofill[j+1] - ofill[j]));
1516       cols[cnt]  = dfill[j+1] - dfill[j] + (s + i)*(ofill[j+1] - ofill[j]);
1517       if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1518         if (size > 1) ocols[cnt] += (s - i)*(ofill[j+1] - ofill[j]);
1519         else cols[cnt] += (s - i)*(ofill[j+1] - ofill[j]);
1520       }
1521       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1522       cnt++;
1523     }
1524   }
1525   for (i=s; i<nx-s; i++) {
1526     for (j=0; j<nc; j++) {
1527       cols[cnt] = dfill[j+1] - dfill[j] + 2*s*(ofill[j+1] - ofill[j]);
1528       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1529       cnt++;
1530     }
1531   }
1532   /* coupling with process to the right */
1533   for (i=nx-s; i<nx; i++) {
1534     for (j=0; j<nc; j++) {
1535       ocols[cnt] = ((rank == (size-1)) ? 0 : (i - nx + s + 1)*(ofill[j+1] - ofill[j]));
1536       cols[cnt]  = dfill[j+1] - dfill[j] + (s + nx - i - 1)*(ofill[j+1] - ofill[j]);
1537       if ((rank == size-1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1538         if (size > 1) ocols[cnt] += (i - nx + s + 1)*(ofill[j+1] - ofill[j]);
1539         else cols[cnt] += (i - nx + s + 1)*(ofill[j+1] - ofill[j]);
1540       }
1541       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1542       cnt++;
1543     }
1544   }
1545 
1546   PetscCall(MatSeqAIJSetPreallocation(J,0,cols));
1547   PetscCall(MatMPIAIJSetPreallocation(J,0,cols,0,ocols));
1548   PetscCall(PetscFree2(cols,ocols));
1549 
1550   PetscCall(DMGetLocalToGlobalMapping(da,&ltog));
1551   PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog));
1552 
1553   /*
1554     For each node in the grid: we get the neighbors in the local (on processor ordering
1555     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1556     PETSc ordering.
1557   */
1558   if (!da->prealloc_only) {
1559     PetscCall(PetscMalloc1(maxcnt,&cols));
1560     row = xs*nc;
1561     /* coupling with process to the left */
1562     for (i=xs; i<xs+s; i++) {
1563       for (j=0; j<nc; j++) {
1564         cnt = 0;
1565         if (rank) {
1566           for (l=0; l<s; l++) {
1567             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1568           }
1569         }
1570         if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1571           for (l=0; l<s; l++) {
1572             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (m + i - s - l)*nc + ofill[k];
1573           }
1574         }
1575         if (dfill) {
1576           for (k=dfill[j]; k<dfill[j+1]; k++) {
1577             cols[cnt++] = i*nc + dfill[k];
1578           }
1579         } else {
1580           for (k=0; k<nc; k++) {
1581             cols[cnt++] = i*nc + k;
1582           }
1583         }
1584         for (l=0; l<s; l++) {
1585           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1586         }
1587         PetscCall(MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES));
1588         row++;
1589       }
1590     }
1591     for (i=xs+s; i<xs+nx-s; i++) {
1592       for (j=0; j<nc; j++) {
1593         cnt = 0;
1594         for (l=0; l<s; l++) {
1595           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1596         }
1597         if (dfill) {
1598           for (k=dfill[j]; k<dfill[j+1]; k++) {
1599             cols[cnt++] = i*nc + dfill[k];
1600           }
1601         } else {
1602           for (k=0; k<nc; k++) {
1603             cols[cnt++] = i*nc + k;
1604           }
1605         }
1606         for (l=0; l<s; l++) {
1607           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1608         }
1609         PetscCall(MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES));
1610         row++;
1611       }
1612     }
1613     /* coupling with process to the right */
1614     for (i=xs+nx-s; i<xs+nx; i++) {
1615       for (j=0; j<nc; j++) {
1616         cnt = 0;
1617         for (l=0; l<s; l++) {
1618           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1619         }
1620         if (dfill) {
1621           for (k=dfill[j]; k<dfill[j+1]; k++) {
1622             cols[cnt++] = i*nc + dfill[k];
1623           }
1624         } else {
1625           for (k=0; k<nc; k++) {
1626             cols[cnt++] = i*nc + k;
1627           }
1628         }
1629         if (rank < size-1) {
1630           for (l=0; l<s; l++) {
1631             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1632           }
1633         }
1634         if ((rank == size-1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1635           for (l=0; l<s; l++) {
1636             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s - l - m + 2)*nc + ofill[k];
1637           }
1638         }
1639         PetscCall(MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES));
1640         row++;
1641       }
1642     }
1643     PetscCall(PetscFree(cols));
1644     /* do not copy values to GPU since they are all zero and not yet needed there */
1645     PetscCall(MatBindToCPU(J,PETSC_TRUE));
1646     PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY));
1647     PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY));
1648     PetscCall(MatBindToCPU(J,PETSC_FALSE));
1649     PetscCall(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
1650   }
1651   PetscFunctionReturn(0);
1652 }
1653 
1654 /* ---------------------------------------------------------------------------------*/
1655 
1656 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da,Mat J)
1657 {
1658   PetscInt               xs,nx,i,i1,slot,gxs,gnx;
1659   PetscInt               m,dim,s,*cols = NULL,nc,*rows = NULL,col,cnt,l;
1660   PetscInt               istart,iend;
1661   DMBoundaryType         bx;
1662   ISLocalToGlobalMapping ltog,mltog;
1663 
1664   PetscFunctionBegin;
1665   /*
1666          nc - number of components per grid point
1667          col - number of colors needed in one direction for single component problem
1668 
1669   */
1670   PetscCall(DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL));
1671   if (bx == DM_BOUNDARY_NONE) {
1672     PetscCall(MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE));
1673   }
1674   col  = 2*s + 1;
1675 
1676   PetscCall(DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL));
1677   PetscCall(DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL));
1678 
1679   PetscCall(MatSetBlockSize(J,nc));
1680   PetscCall(MatSeqAIJSetPreallocation(J,col*nc,NULL));
1681   PetscCall(MatMPIAIJSetPreallocation(J,col*nc,NULL,col*nc,NULL));
1682 
1683   PetscCall(DMGetLocalToGlobalMapping(da,&ltog));
1684   PetscCall(MatGetLocalToGlobalMapping(J,&mltog,NULL));
1685   if (!mltog) {
1686     PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog));
1687   }
1688 
1689   /*
1690     For each node in the grid: we get the neighbors in the local (on processor ordering
1691     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1692     PETSc ordering.
1693   */
1694   if (!da->prealloc_only) {
1695     PetscCall(PetscMalloc2(nc,&rows,col*nc*nc,&cols));
1696     for (i=xs; i<xs+nx; i++) {
1697       istart = PetscMax(-s,gxs - i);
1698       iend   = PetscMin(s,gxs + gnx - i - 1);
1699       slot   = i - gxs;
1700 
1701       cnt = 0;
1702       for (i1=istart; i1<iend+1; i1++) {
1703         cols[cnt++] = nc*(slot + i1);
1704         for (l=1; l<nc; l++) {
1705           cols[cnt] = 1 + cols[cnt-1];cnt++;
1706         }
1707       }
1708       rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1];
1709       PetscCall(MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES));
1710     }
1711     /* do not copy values to GPU since they are all zero and not yet needed there */
1712     PetscCall(MatBindToCPU(J,PETSC_TRUE));
1713     PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY));
1714     PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY));
1715     if (bx == DM_BOUNDARY_NONE) {
1716       PetscCall(MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE));
1717     }
1718     PetscCall(MatBindToCPU(J,PETSC_FALSE));
1719     PetscCall(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
1720     PetscCall(PetscFree2(rows,cols));
1721   }
1722   PetscFunctionReturn(0);
1723 }
1724 
1725 /* ---------------------------------------------------------------------------------*/
1726 
1727 PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM da,Mat J)
1728 {
1729   PetscInt               xs,nx,i,i1,slot,gxs,gnx;
1730   PetscInt               m,dim,s,*cols = NULL,nc,*rows = NULL,col,cnt,l;
1731   PetscInt               istart,iend;
1732   DMBoundaryType         bx;
1733   ISLocalToGlobalMapping ltog,mltog;
1734 
1735   PetscFunctionBegin;
1736   /*
1737          nc - number of components per grid point
1738          col - number of colors needed in one direction for single component problem
1739   */
1740   PetscCall(DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL));
1741   col  = 2*s + 1;
1742 
1743   PetscCall(DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL));
1744   PetscCall(DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL));
1745 
1746   PetscCall(MatSetBlockSize(J,nc));
1747   PetscCall(MatSeqAIJSetTotalPreallocation(J,nx*nc*col*nc));
1748 
1749   PetscCall(DMGetLocalToGlobalMapping(da,&ltog));
1750   PetscCall(MatGetLocalToGlobalMapping(J,&mltog,NULL));
1751   if (!mltog) {
1752     PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog));
1753   }
1754 
1755   /*
1756     For each node in the grid: we get the neighbors in the local (on processor ordering
1757     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1758     PETSc ordering.
1759   */
1760   if (!da->prealloc_only) {
1761     PetscCall(PetscMalloc2(nc,&rows,col*nc*nc,&cols));
1762     for (i=xs; i<xs+nx; i++) {
1763       istart = PetscMax(-s,gxs - i);
1764       iend   = PetscMin(s,gxs + gnx - i - 1);
1765       slot   = i - gxs;
1766 
1767       cnt = 0;
1768       for (i1=istart; i1<iend+1; i1++) {
1769         cols[cnt++] = nc*(slot + i1);
1770         for (l=1; l<nc; l++) {
1771           cols[cnt] = 1 + cols[cnt-1];cnt++;
1772         }
1773       }
1774       rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1];
1775       PetscCall(MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES));
1776     }
1777     /* do not copy values to GPU since they are all zero and not yet needed there */
1778     PetscCall(MatBindToCPU(J,PETSC_TRUE));
1779     PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY));
1780     PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY));
1781     if (bx == DM_BOUNDARY_NONE) {
1782       PetscCall(MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE));
1783     }
1784     PetscCall(MatBindToCPU(J,PETSC_FALSE));
1785     PetscCall(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
1786     PetscCall(PetscFree2(rows,cols));
1787   }
1788   PetscCall(MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE));
1789   PetscFunctionReturn(0);
1790 }
1791 
1792 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da,Mat J)
1793 {
1794   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1795   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
1796   PetscInt               istart,iend,jstart,jend,ii,jj;
1797   MPI_Comm               comm;
1798   PetscScalar            *values;
1799   DMBoundaryType         bx,by;
1800   DMDAStencilType        st;
1801   ISLocalToGlobalMapping ltog;
1802 
1803   PetscFunctionBegin;
1804   /*
1805      nc - number of components per grid point
1806      col - number of colors needed in one direction for single component problem
1807   */
1808   PetscCall(DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st));
1809   col  = 2*s + 1;
1810 
1811   PetscCall(DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL));
1812   PetscCall(DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL));
1813   PetscCall(PetscObjectGetComm((PetscObject)da,&comm));
1814 
1815   PetscCall(PetscMalloc1(col*col*nc*nc,&cols));
1816 
1817   PetscCall(DMGetLocalToGlobalMapping(da,&ltog));
1818 
1819   /* determine the matrix preallocation information */
1820   MatPreallocateBegin(comm,nx*ny,nx*ny,dnz,onz);
1821   for (i=xs; i<xs+nx; i++) {
1822     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1823     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1824     for (j=ys; j<ys+ny; j++) {
1825       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1826       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1827       slot   = i - gxs + gnx*(j - gys);
1828 
1829       /* Find block columns in block row */
1830       cnt = 0;
1831       for (ii=istart; ii<iend+1; ii++) {
1832         for (jj=jstart; jj<jend+1; jj++) {
1833           if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1834             cols[cnt++] = slot + ii + gnx*jj;
1835           }
1836         }
1837       }
1838       PetscCall(MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz));
1839     }
1840   }
1841   PetscCall(MatSeqBAIJSetPreallocation(J,nc,0,dnz));
1842   PetscCall(MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz));
1843   MatPreallocateEnd(dnz,onz);
1844 
1845   PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog));
1846 
1847   /*
1848     For each node in the grid: we get the neighbors in the local (on processor ordering
1849     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1850     PETSc ordering.
1851   */
1852   if (!da->prealloc_only) {
1853     PetscCall(PetscCalloc1(col*col*nc*nc,&values));
1854     for (i=xs; i<xs+nx; i++) {
1855       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1856       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1857       for (j=ys; j<ys+ny; j++) {
1858         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1859         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1860         slot = i - gxs + gnx*(j - gys);
1861         cnt  = 0;
1862         for (ii=istart; ii<iend+1; ii++) {
1863           for (jj=jstart; jj<jend+1; jj++) {
1864             if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1865               cols[cnt++] = slot + ii + gnx*jj;
1866             }
1867           }
1868         }
1869         PetscCall(MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES));
1870       }
1871     }
1872     PetscCall(PetscFree(values));
1873     /* do not copy values to GPU since they are all zero and not yet needed there */
1874     PetscCall(MatBindToCPU(J,PETSC_TRUE));
1875     PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY));
1876     PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY));
1877     PetscCall(MatBindToCPU(J,PETSC_FALSE));
1878     PetscCall(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
1879   }
1880   PetscCall(PetscFree(cols));
1881   PetscFunctionReturn(0);
1882 }
1883 
1884 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da,Mat J)
1885 {
1886   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1887   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
1888   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1889   MPI_Comm               comm;
1890   PetscScalar            *values;
1891   DMBoundaryType         bx,by,bz;
1892   DMDAStencilType        st;
1893   ISLocalToGlobalMapping ltog;
1894 
1895   PetscFunctionBegin;
1896   /*
1897          nc - number of components per grid point
1898          col - number of colors needed in one direction for single component problem
1899 
1900   */
1901   PetscCall(DMDAGetInfo(da,&dim,&m,&n,&p,NULL,NULL,NULL,&nc,&s,&bx,&by,&bz,&st));
1902   col  = 2*s + 1;
1903 
1904   PetscCall(DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz));
1905   PetscCall(DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz));
1906   PetscCall(PetscObjectGetComm((PetscObject)da,&comm));
1907 
1908   PetscCall(PetscMalloc1(col*col*col,&cols));
1909 
1910   PetscCall(DMGetLocalToGlobalMapping(da,&ltog));
1911 
1912   /* determine the matrix preallocation information */
1913   MatPreallocateBegin(comm,nx*ny*nz,nx*ny*nz,dnz,onz);
1914   for (i=xs; i<xs+nx; i++) {
1915     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1916     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1917     for (j=ys; j<ys+ny; j++) {
1918       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1919       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1920       for (k=zs; k<zs+nz; k++) {
1921         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1922         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1923 
1924         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1925 
1926         /* Find block columns in block row */
1927         cnt = 0;
1928         for (ii=istart; ii<iend+1; ii++) {
1929           for (jj=jstart; jj<jend+1; jj++) {
1930             for (kk=kstart; kk<kend+1; kk++) {
1931               if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1932                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1933               }
1934             }
1935           }
1936         }
1937         PetscCall(MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz));
1938       }
1939     }
1940   }
1941   PetscCall(MatSeqBAIJSetPreallocation(J,nc,0,dnz));
1942   PetscCall(MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz));
1943   MatPreallocateEnd(dnz,onz);
1944 
1945   PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog));
1946 
1947   /*
1948     For each node in the grid: we get the neighbors in the local (on processor ordering
1949     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1950     PETSc ordering.
1951   */
1952   if (!da->prealloc_only) {
1953     PetscCall(PetscCalloc1(col*col*col*nc*nc,&values));
1954     for (i=xs; i<xs+nx; i++) {
1955       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1956       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1957       for (j=ys; j<ys+ny; j++) {
1958         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1959         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1960         for (k=zs; k<zs+nz; k++) {
1961           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1962           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1963 
1964           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1965 
1966           cnt = 0;
1967           for (ii=istart; ii<iend+1; ii++) {
1968             for (jj=jstart; jj<jend+1; jj++) {
1969               for (kk=kstart; kk<kend+1; kk++) {
1970                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1971                   cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1972                 }
1973               }
1974             }
1975           }
1976           PetscCall(MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES));
1977         }
1978       }
1979     }
1980     PetscCall(PetscFree(values));
1981     /* do not copy values to GPU since they are all zero and not yet needed there */
1982     PetscCall(MatBindToCPU(J,PETSC_TRUE));
1983     PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY));
1984     PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY));
1985     PetscCall(MatBindToCPU(J,PETSC_FALSE));
1986     PetscCall(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
1987   }
1988   PetscCall(PetscFree(cols));
1989   PetscFunctionReturn(0);
1990 }
1991 
1992 /*
1993   This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to
1994   identify in the local ordering with periodic domain.
1995 */
1996 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog,PetscInt *row,PetscInt *cnt,PetscInt col[])
1997 {
1998   PetscInt       i,n;
1999 
2000   PetscFunctionBegin;
2001   PetscCall(ISLocalToGlobalMappingApplyBlock(ltog,1,row,row));
2002   PetscCall(ISLocalToGlobalMappingApplyBlock(ltog,*cnt,col,col));
2003   for (i=0,n=0; i<*cnt; i++) {
2004     if (col[i] >= *row) col[n++] = col[i];
2005   }
2006   *cnt = n;
2007   PetscFunctionReturn(0);
2008 }
2009 
2010 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da,Mat J)
2011 {
2012   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
2013   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
2014   PetscInt               istart,iend,jstart,jend,ii,jj;
2015   MPI_Comm               comm;
2016   PetscScalar            *values;
2017   DMBoundaryType         bx,by;
2018   DMDAStencilType        st;
2019   ISLocalToGlobalMapping ltog;
2020 
2021   PetscFunctionBegin;
2022   /*
2023      nc - number of components per grid point
2024      col - number of colors needed in one direction for single component problem
2025   */
2026   PetscCall(DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st));
2027   col  = 2*s + 1;
2028 
2029   PetscCall(DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL));
2030   PetscCall(DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL));
2031   PetscCall(PetscObjectGetComm((PetscObject)da,&comm));
2032 
2033   PetscCall(PetscMalloc1(col*col*nc*nc,&cols));
2034 
2035   PetscCall(DMGetLocalToGlobalMapping(da,&ltog));
2036 
2037   /* determine the matrix preallocation information */
2038   MatPreallocateBegin(comm,nx*ny,nx*ny,dnz,onz);
2039   for (i=xs; i<xs+nx; i++) {
2040     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2041     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2042     for (j=ys; j<ys+ny; j++) {
2043       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2044       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2045       slot   = i - gxs + gnx*(j - gys);
2046 
2047       /* Find block columns in block row */
2048       cnt = 0;
2049       for (ii=istart; ii<iend+1; ii++) {
2050         for (jj=jstart; jj<jend+1; jj++) {
2051           if (st == DMDA_STENCIL_BOX || !ii || !jj) {
2052             cols[cnt++] = slot + ii + gnx*jj;
2053           }
2054         }
2055       }
2056       PetscCall(L2GFilterUpperTriangular(ltog,&slot,&cnt,cols));
2057       PetscCall(MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz));
2058     }
2059   }
2060   PetscCall(MatSeqSBAIJSetPreallocation(J,nc,0,dnz));
2061   PetscCall(MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz));
2062   MatPreallocateEnd(dnz,onz);
2063 
2064   PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog));
2065 
2066   /*
2067     For each node in the grid: we get the neighbors in the local (on processor ordering
2068     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2069     PETSc ordering.
2070   */
2071   if (!da->prealloc_only) {
2072     PetscCall(PetscCalloc1(col*col*nc*nc,&values));
2073     for (i=xs; i<xs+nx; i++) {
2074       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2075       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2076       for (j=ys; j<ys+ny; j++) {
2077         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2078         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2079         slot   = i - gxs + gnx*(j - gys);
2080 
2081         /* Find block columns in block row */
2082         cnt = 0;
2083         for (ii=istart; ii<iend+1; ii++) {
2084           for (jj=jstart; jj<jend+1; jj++) {
2085             if (st == DMDA_STENCIL_BOX || !ii || !jj) {
2086               cols[cnt++] = slot + ii + gnx*jj;
2087             }
2088           }
2089         }
2090         PetscCall(L2GFilterUpperTriangular(ltog,&slot,&cnt,cols));
2091         PetscCall(MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES));
2092       }
2093     }
2094     PetscCall(PetscFree(values));
2095     /* do not copy values to GPU since they are all zero and not yet needed there */
2096     PetscCall(MatBindToCPU(J,PETSC_TRUE));
2097     PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY));
2098     PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY));
2099     PetscCall(MatBindToCPU(J,PETSC_FALSE));
2100     PetscCall(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
2101   }
2102   PetscCall(PetscFree(cols));
2103   PetscFunctionReturn(0);
2104 }
2105 
2106 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da,Mat J)
2107 {
2108   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
2109   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
2110   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
2111   MPI_Comm               comm;
2112   PetscScalar            *values;
2113   DMBoundaryType         bx,by,bz;
2114   DMDAStencilType        st;
2115   ISLocalToGlobalMapping ltog;
2116 
2117   PetscFunctionBegin;
2118   /*
2119      nc - number of components per grid point
2120      col - number of colors needed in one direction for single component problem
2121   */
2122   PetscCall(DMDAGetInfo(da,&dim,&m,&n,&p,NULL,NULL,NULL,&nc,&s,&bx,&by,&bz,&st));
2123   col  = 2*s + 1;
2124 
2125   PetscCall(DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz));
2126   PetscCall(DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz));
2127   PetscCall(PetscObjectGetComm((PetscObject)da,&comm));
2128 
2129   /* create the matrix */
2130   PetscCall(PetscMalloc1(col*col*col,&cols));
2131 
2132   PetscCall(DMGetLocalToGlobalMapping(da,&ltog));
2133 
2134   /* determine the matrix preallocation information */
2135   MatPreallocateBegin(comm,nx*ny*nz,nx*ny*nz,dnz,onz);
2136   for (i=xs; i<xs+nx; i++) {
2137     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2138     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2139     for (j=ys; j<ys+ny; j++) {
2140       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2141       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2142       for (k=zs; k<zs+nz; k++) {
2143         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2144         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2145 
2146         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2147 
2148         /* Find block columns in block row */
2149         cnt = 0;
2150         for (ii=istart; ii<iend+1; ii++) {
2151           for (jj=jstart; jj<jend+1; jj++) {
2152             for (kk=kstart; kk<kend+1; kk++) {
2153               if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
2154                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
2155               }
2156             }
2157           }
2158         }
2159         PetscCall(L2GFilterUpperTriangular(ltog,&slot,&cnt,cols));
2160         PetscCall(MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz));
2161       }
2162     }
2163   }
2164   PetscCall(MatSeqSBAIJSetPreallocation(J,nc,0,dnz));
2165   PetscCall(MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz));
2166   MatPreallocateEnd(dnz,onz);
2167 
2168   PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog));
2169 
2170   /*
2171     For each node in the grid: we get the neighbors in the local (on processor ordering
2172     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2173     PETSc ordering.
2174   */
2175   if (!da->prealloc_only) {
2176     PetscCall(PetscCalloc1(col*col*col*nc*nc,&values));
2177     for (i=xs; i<xs+nx; i++) {
2178       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2179       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2180       for (j=ys; j<ys+ny; j++) {
2181         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2182         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2183         for (k=zs; k<zs+nz; k++) {
2184           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2185           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2186 
2187           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2188 
2189           cnt = 0;
2190           for (ii=istart; ii<iend+1; ii++) {
2191             for (jj=jstart; jj<jend+1; jj++) {
2192               for (kk=kstart; kk<kend+1; kk++) {
2193                 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
2194                   cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
2195                 }
2196               }
2197             }
2198           }
2199           PetscCall(L2GFilterUpperTriangular(ltog,&slot,&cnt,cols));
2200           PetscCall(MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES));
2201         }
2202       }
2203     }
2204     PetscCall(PetscFree(values));
2205     /* do not copy values to GPU since they are all zero and not yet needed there */
2206     PetscCall(MatBindToCPU(J,PETSC_TRUE));
2207     PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY));
2208     PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY));
2209     PetscCall(MatBindToCPU(J,PETSC_FALSE));
2210     PetscCall(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
2211   }
2212   PetscCall(PetscFree(cols));
2213   PetscFunctionReturn(0);
2214 }
2215 
2216 /* ---------------------------------------------------------------------------------*/
2217 
2218 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da,Mat J)
2219 {
2220   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
2221   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt, maxcnt = 0,l,p,*dnz,*onz;
2222   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
2223   DM_DA                  *dd = (DM_DA*)da->data;
2224   PetscInt               ifill_col,*dfill = dd->dfill,*ofill = dd->ofill;
2225   MPI_Comm               comm;
2226   PetscScalar            *values;
2227   DMBoundaryType         bx,by,bz;
2228   ISLocalToGlobalMapping ltog;
2229   DMDAStencilType        st;
2230   PetscBool              removedups = PETSC_FALSE;
2231 
2232   PetscFunctionBegin;
2233   /*
2234          nc - number of components per grid point
2235          col - number of colors needed in one direction for single component problem
2236 
2237   */
2238   PetscCall(DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st));
2239   col  = 2*s + 1;
2240   PetscCheck(bx != DM_BOUNDARY_PERIODIC || (m % col) == 0,PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
2241                  by 2*stencil_width + 1\n");
2242   PetscCheck(by != DM_BOUNDARY_PERIODIC || (n % col) == 0,PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
2243                  by 2*stencil_width + 1\n");
2244   PetscCheck(bz != DM_BOUNDARY_PERIODIC || (p % col) == 0,PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
2245                  by 2*stencil_width + 1\n");
2246 
2247   /*
2248        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
2249        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
2250   */
2251   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
2252   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
2253   if (P == 1 && 2*s >= p) removedups = PETSC_TRUE;
2254 
2255   PetscCall(DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz));
2256   PetscCall(DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz));
2257   PetscCall(PetscObjectGetComm((PetscObject)da,&comm));
2258 
2259   PetscCall(PetscMalloc1(col*col*col*nc,&cols));
2260   PetscCall(DMGetLocalToGlobalMapping(da,&ltog));
2261 
2262   /* determine the matrix preallocation information */
2263   MatPreallocateBegin(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);
2264 
2265   PetscCall(MatSetBlockSize(J,nc));
2266   for (i=xs; i<xs+nx; i++) {
2267     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2268     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2269     for (j=ys; j<ys+ny; j++) {
2270       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2271       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2272       for (k=zs; k<zs+nz; k++) {
2273         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2274         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2275 
2276         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2277 
2278         for (l=0; l<nc; l++) {
2279           cnt = 0;
2280           for (ii=istart; ii<iend+1; ii++) {
2281             for (jj=jstart; jj<jend+1; jj++) {
2282               for (kk=kstart; kk<kend+1; kk++) {
2283                 if (ii || jj || kk) {
2284                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
2285                     for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2286                   }
2287                 } else {
2288                   if (dfill) {
2289                     for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2290                   } else {
2291                     for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2292                   }
2293                 }
2294               }
2295             }
2296           }
2297           row  = l + nc*(slot);
2298           maxcnt = PetscMax(maxcnt,cnt);
2299           if (removedups) {
2300             PetscCall(MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz));
2301           } else {
2302             PetscCall(MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz));
2303           }
2304         }
2305       }
2306     }
2307   }
2308   PetscCall(MatSeqAIJSetPreallocation(J,0,dnz));
2309   PetscCall(MatMPIAIJSetPreallocation(J,0,dnz,0,onz));
2310   MatPreallocateEnd(dnz,onz);
2311   PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog));
2312 
2313   /*
2314     For each node in the grid: we get the neighbors in the local (on processor ordering
2315     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2316     PETSc ordering.
2317   */
2318   if (!da->prealloc_only) {
2319     PetscCall(PetscCalloc1(maxcnt,&values));
2320     for (i=xs; i<xs+nx; i++) {
2321       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2322       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2323       for (j=ys; j<ys+ny; j++) {
2324         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2325         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2326         for (k=zs; k<zs+nz; k++) {
2327           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2328           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2329 
2330           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2331 
2332           for (l=0; l<nc; l++) {
2333             cnt = 0;
2334             for (ii=istart; ii<iend+1; ii++) {
2335               for (jj=jstart; jj<jend+1; jj++) {
2336                 for (kk=kstart; kk<kend+1; kk++) {
2337                   if (ii || jj || kk) {
2338                     if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
2339                       for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2340                     }
2341                   } else {
2342                     if (dfill) {
2343                       for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2344                     } else {
2345                       for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2346                     }
2347                   }
2348                 }
2349               }
2350             }
2351             row  = l + nc*(slot);
2352             PetscCall(MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES));
2353           }
2354         }
2355       }
2356     }
2357     PetscCall(PetscFree(values));
2358     /* do not copy values to GPU since they are all zero and not yet needed there */
2359     PetscCall(MatBindToCPU(J,PETSC_TRUE));
2360     PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY));
2361     PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY));
2362     PetscCall(MatBindToCPU(J,PETSC_FALSE));
2363     PetscCall(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
2364   }
2365   PetscCall(PetscFree(cols));
2366   PetscFunctionReturn(0);
2367 }
2368