xref: /petsc/src/dm/impls/da/fdda.c (revision daa037dfd3c3bec8dc8659548d2b20b07c1dc6de)
1 
2 #include <petsc/private/dmdaimpl.h> /*I      "petscdmda.h"     I*/
3 #include <petscmat.h>
4 #include <petscbt.h>
5 
6 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM,ISColoringType,ISColoring*);
7 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM,ISColoringType,ISColoring*);
8 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM,ISColoringType,ISColoring*);
9 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM,ISColoringType,ISColoring*);
10 
11 /*
12    For ghost i that may be negative or greater than the upper bound this
13   maps it into the 0:m-1 range using periodicity
14 */
15 #define SetInRange(i,m) ((i < 0) ? m+i : ((i >= m) ? i-m : i))
16 
17 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill,PetscInt w,PetscInt **rfill)
18 {
19   PetscInt       i,j,nz,*fill;
20 
21   PetscFunctionBegin;
22   if (!dfill) PetscFunctionReturn(0);
23 
24   /* count number nonzeros */
25   nz = 0;
26   for (i=0; i<w; i++) {
27     for (j=0; j<w; j++) {
28       if (dfill[w*i+j]) nz++;
29     }
30   }
31   PetscCall(PetscMalloc1(nz + w + 1,&fill));
32   /* construct modified CSR storage of nonzero structure */
33   /*  fill[0 -- w] marks starts of each row of column indices (and end of last row)
34    so fill[1] - fill[0] gives number of nonzeros in first row etc */
35   nz = w + 1;
36   for (i=0; i<w; i++) {
37     fill[i] = nz;
38     for (j=0; j<w; j++) {
39       if (dfill[w*i+j]) {
40         fill[nz] = j;
41         nz++;
42       }
43     }
44   }
45   fill[w] = nz;
46 
47   *rfill = fill;
48   PetscFunctionReturn(0);
49 }
50 
51 static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse,PetscInt w,PetscInt **rfill)
52 {
53   PetscInt       nz;
54 
55   PetscFunctionBegin;
56   if (!dfillsparse) PetscFunctionReturn(0);
57 
58   /* Determine number of non-zeros */
59   nz = (dfillsparse[w] - w - 1);
60 
61   /* Allocate space for our copy of the given sparse matrix representation. */
62   PetscCall(PetscMalloc1(nz + w + 1,rfill));
63   PetscCall(PetscArraycpy(*rfill,dfillsparse,nz+w+1));
64   PetscFunctionReturn(0);
65 }
66 
67 static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd)
68 {
69   PetscInt       i,k,cnt = 1;
70 
71   PetscFunctionBegin;
72 
73   /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of
74    columns to the left with any nonzeros in them plus 1 */
75   PetscCall(PetscCalloc1(dd->w,&dd->ofillcols));
76   for (i=0; i<dd->w; i++) {
77     for (k=dd->ofill[i]; k<dd->ofill[i+1]; k++) dd->ofillcols[dd->ofill[k]] = 1;
78   }
79   for (i=0; i<dd->w; i++) {
80     if (dd->ofillcols[i]) {
81       dd->ofillcols[i] = cnt++;
82     }
83   }
84   PetscFunctionReturn(0);
85 }
86 
87 /*@
88     DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem
89     of the matrix returned by DMCreateMatrix().
90 
91     Logically Collective on da
92 
93     Input Parameters:
94 +   da - the distributed array
95 .   dfill - the fill pattern in the diagonal block (may be NULL, means use dense block)
96 -   ofill - the fill pattern in the off-diagonal blocks
97 
98     Level: developer
99 
100     Notes:
101     This only makes sense when you are doing multicomponent problems but using the
102        MPIAIJ matrix format
103 
104            The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries
105        representing coupling and 0 entries for missing coupling. For example
106 $             dfill[9] = {1, 0, 0,
107 $                         1, 1, 0,
108 $                         0, 1, 1}
109        means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with
110        itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the
111        diagonal block).
112 
113      DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
114      can be represented in the dfill, ofill format
115 
116    Contributed by Glenn Hammond
117 
118 .seealso DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly()
119 
120 @*/
121 PetscErrorCode  DMDASetBlockFills(DM da,const PetscInt *dfill,const PetscInt *ofill)
122 {
123   DM_DA          *dd = (DM_DA*)da->data;
124 
125   PetscFunctionBegin;
126   /* save the given dfill and ofill information */
127   PetscCall(DMDASetBlockFills_Private(dfill,dd->w,&dd->dfill));
128   PetscCall(DMDASetBlockFills_Private(ofill,dd->w,&dd->ofill));
129 
130   /* count nonzeros in ofill columns */
131   PetscCall(DMDASetBlockFills_Private2(dd));
132 
133   PetscFunctionReturn(0);
134 }
135 
136 /*@
137     DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem
138     of the matrix returned by DMCreateMatrix(), using sparse representations
139     of fill patterns.
140 
141     Logically Collective on da
142 
143     Input Parameters:
144 +   da - the distributed array
145 .   dfill - the sparse fill pattern in the diagonal block (may be NULL, means use dense block)
146 -   ofill - the sparse fill pattern in the off-diagonal blocks
147 
148     Level: developer
149 
150     Notes: This only makes sense when you are doing multicomponent problems but using the
151        MPIAIJ matrix format
152 
153            The format for dfill and ofill is a sparse representation of a
154            dof-by-dof matrix with 1 entries representing coupling and 0 entries
155            for missing coupling.  The sparse representation is a 1 dimensional
156            array of length nz + dof + 1, where nz is the number of non-zeros in
157            the matrix.  The first dof entries in the array give the
158            starting array indices of each row's items in the rest of the array,
159            the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array)
160            and the remaining nz items give the column indices of each of
161            the 1s within the logical 2D matrix.  Each row's items within
162            the array are the column indices of the 1s within that row
163            of the 2D matrix.  PETSc developers may recognize that this is the
164            same format as that computed by the DMDASetBlockFills_Private()
165            function from a dense 2D matrix representation.
166 
167      DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
168      can be represented in the dfill, ofill format
169 
170    Contributed by Philip C. Roth
171 
172 .seealso DMDASetBlockFills(), DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly()
173 
174 @*/
175 PetscErrorCode  DMDASetBlockFillsSparse(DM da,const PetscInt *dfillsparse,const PetscInt *ofillsparse)
176 {
177   DM_DA          *dd = (DM_DA*)da->data;
178 
179   PetscFunctionBegin;
180   /* save the given dfill and ofill information */
181   PetscCall(DMDASetBlockFillsSparse_Private(dfillsparse,dd->w,&dd->dfill));
182   PetscCall(DMDASetBlockFillsSparse_Private(ofillsparse,dd->w,&dd->ofill));
183 
184   /* count nonzeros in ofill columns */
185   PetscCall(DMDASetBlockFills_Private2(dd));
186 
187   PetscFunctionReturn(0);
188 }
189 
190 PetscErrorCode  DMCreateColoring_DA(DM da,ISColoringType ctype,ISColoring *coloring)
191 {
192   PetscInt         dim,m,n,p,nc;
193   DMBoundaryType   bx,by,bz;
194   MPI_Comm         comm;
195   PetscMPIInt      size;
196   PetscBool        isBAIJ;
197   DM_DA            *dd = (DM_DA*)da->data;
198 
199   PetscFunctionBegin;
200   /*
201                                   m
202           ------------------------------------------------------
203          |                                                     |
204          |                                                     |
205          |               ----------------------                |
206          |               |                    |                |
207       n  |           yn  |                    |                |
208          |               |                    |                |
209          |               .---------------------                |
210          |             (xs,ys)     xn                          |
211          |            .                                        |
212          |         (gxs,gys)                                   |
213          |                                                     |
214           -----------------------------------------------------
215   */
216 
217   /*
218          nc - number of components per grid point
219          col - number of colors needed in one direction for single component problem
220 
221   */
222   PetscCall(DMDAGetInfo(da,&dim,NULL,NULL,NULL,&m,&n,&p,&nc,NULL,&bx,&by,&bz,NULL));
223 
224   PetscCall(PetscObjectGetComm((PetscObject)da,&comm));
225   PetscCallMPI(MPI_Comm_size(comm,&size));
226   if (ctype == IS_COLORING_LOCAL) {
227     if (size == 1) {
228       ctype = IS_COLORING_GLOBAL;
229     } else if (dim > 1) {
230       if ((m==1 && bx == DM_BOUNDARY_PERIODIC) || (n==1 && by == DM_BOUNDARY_PERIODIC) || (p==1 && bz == DM_BOUNDARY_PERIODIC)) {
231         SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain  on the same process");
232       }
233     }
234   }
235 
236   /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ
237      matrices is for the blocks, not the individual matrix elements  */
238   PetscCall(PetscStrbeginswith(da->mattype,MATBAIJ,&isBAIJ));
239   if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype,MATMPIBAIJ,&isBAIJ));
240   if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype,MATSEQBAIJ,&isBAIJ));
241   if (isBAIJ) {
242     dd->w  = 1;
243     dd->xs = dd->xs/nc;
244     dd->xe = dd->xe/nc;
245     dd->Xs = dd->Xs/nc;
246     dd->Xe = dd->Xe/nc;
247   }
248 
249   /*
250      We do not provide a getcoloring function in the DMDA operations because
251    the basic DMDA does not know about matrices. We think of DMDA as being
252    more low-level then matrices.
253   */
254   if (dim == 1) {
255     PetscCall(DMCreateColoring_DA_1d_MPIAIJ(da,ctype,coloring));
256   } else if (dim == 2) {
257     PetscCall(DMCreateColoring_DA_2d_MPIAIJ(da,ctype,coloring));
258   } else if (dim == 3) {
259     PetscCall(DMCreateColoring_DA_3d_MPIAIJ(da,ctype,coloring));
260   } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not done for %D dimension, send us mail petsc-maint@mcs.anl.gov for code",dim);
261   if (isBAIJ) {
262     dd->w  = nc;
263     dd->xs = dd->xs*nc;
264     dd->xe = dd->xe*nc;
265     dd->Xs = dd->Xs*nc;
266     dd->Xe = dd->Xe*nc;
267   }
268   PetscFunctionReturn(0);
269 }
270 
271 /* ---------------------------------------------------------------------------------*/
272 
273 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
274 {
275   PetscInt        xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,M,N,dim,s,k,nc,col;
276   PetscInt        ncolors;
277   MPI_Comm        comm;
278   DMBoundaryType  bx,by;
279   DMDAStencilType st;
280   ISColoringValue *colors;
281   DM_DA           *dd = (DM_DA*)da->data;
282 
283   PetscFunctionBegin;
284   /*
285          nc - number of components per grid point
286          col - number of colors needed in one direction for single component problem
287 
288   */
289   PetscCall(DMDAGetInfo(da,&dim,&m,&n,NULL,&M,&N,NULL,&nc,&s,&bx,&by,NULL,&st));
290   col  = 2*s + 1;
291   PetscCall(DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL));
292   PetscCall(DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL));
293   PetscCall(PetscObjectGetComm((PetscObject)da,&comm));
294 
295   /* special case as taught to us by Paul Hovland */
296   if (st == DMDA_STENCIL_STAR && s == 1) {
297     PetscCall(DMCreateColoring_DA_2d_5pt_MPIAIJ(da,ctype,coloring));
298   } else {
299     if (ctype == IS_COLORING_GLOBAL) {
300       if (!dd->localcoloring) {
301         PetscCall(PetscMalloc1(nc*nx*ny,&colors));
302         ii   = 0;
303         for (j=ys; j<ys+ny; j++) {
304           for (i=xs; i<xs+nx; i++) {
305             for (k=0; k<nc; k++) {
306               colors[ii++] = k + nc*((i % col) + col*(j % col));
307             }
308           }
309         }
310         ncolors = nc + nc*(col-1 + col*(col-1));
311         PetscCall(ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring));
312       }
313       *coloring = dd->localcoloring;
314     } else if (ctype == IS_COLORING_LOCAL) {
315       if (!dd->ghostedcoloring) {
316         PetscCall(PetscMalloc1(nc*gnx*gny,&colors));
317         ii   = 0;
318         for (j=gys; j<gys+gny; j++) {
319           for (i=gxs; i<gxs+gnx; i++) {
320             for (k=0; k<nc; k++) {
321               /* the complicated stuff is to handle periodic boundaries */
322               colors[ii++] = k + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col));
323             }
324           }
325         }
326         ncolors = nc + nc*(col - 1 + col*(col-1));
327         PetscCall(ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring));
328         /* PetscIntView(ncolors,(PetscInt*)colors,0); */
329 
330         PetscCall(ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL));
331       }
332       *coloring = dd->ghostedcoloring;
333     } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
334   }
335   PetscCall(ISColoringReference(*coloring));
336   PetscFunctionReturn(0);
337 }
338 
339 /* ---------------------------------------------------------------------------------*/
340 
341 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
342 {
343   PetscInt        xs,ys,nx,ny,i,j,gxs,gys,gnx,gny,m,n,p,dim,s,k,nc,col,zs,gzs,ii,l,nz,gnz,M,N,P;
344   PetscInt        ncolors;
345   MPI_Comm        comm;
346   DMBoundaryType  bx,by,bz;
347   DMDAStencilType st;
348   ISColoringValue *colors;
349   DM_DA           *dd = (DM_DA*)da->data;
350 
351   PetscFunctionBegin;
352   /*
353          nc - number of components per grid point
354          col - number of colors needed in one direction for single component problem
355 
356   */
357   PetscCall(DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st));
358   col  = 2*s + 1;
359   PetscCall(DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz));
360   PetscCall(DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz));
361   PetscCall(PetscObjectGetComm((PetscObject)da,&comm));
362 
363   /* create the coloring */
364   if (ctype == IS_COLORING_GLOBAL) {
365     if (!dd->localcoloring) {
366       PetscCall(PetscMalloc1(nc*nx*ny*nz,&colors));
367       ii   = 0;
368       for (k=zs; k<zs+nz; k++) {
369         for (j=ys; j<ys+ny; j++) {
370           for (i=xs; i<xs+nx; i++) {
371             for (l=0; l<nc; l++) {
372               colors[ii++] = l + nc*((i % col) + col*(j % col) + col*col*(k % col));
373             }
374           }
375         }
376       }
377       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
378       PetscCall(ISColoringCreate(comm,ncolors,nc*nx*ny*nz,colors,PETSC_OWN_POINTER,&dd->localcoloring));
379     }
380     *coloring = dd->localcoloring;
381   } else if (ctype == IS_COLORING_LOCAL) {
382     if (!dd->ghostedcoloring) {
383       PetscCall(PetscMalloc1(nc*gnx*gny*gnz,&colors));
384       ii   = 0;
385       for (k=gzs; k<gzs+gnz; k++) {
386         for (j=gys; j<gys+gny; j++) {
387           for (i=gxs; i<gxs+gnx; i++) {
388             for (l=0; l<nc; l++) {
389               /* the complicated stuff is to handle periodic boundaries */
390               colors[ii++] = l + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col) + col*col*(SetInRange(k,p) % col));
391             }
392           }
393         }
394       }
395       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
396       PetscCall(ISColoringCreate(comm,ncolors,nc*gnx*gny*gnz,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring));
397       PetscCall(ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL));
398     }
399     *coloring = dd->ghostedcoloring;
400   } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
401   PetscCall(ISColoringReference(*coloring));
402   PetscFunctionReturn(0);
403 }
404 
405 /* ---------------------------------------------------------------------------------*/
406 
407 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
408 {
409   PetscInt        xs,nx,i,i1,gxs,gnx,l,m,M,dim,s,nc,col;
410   PetscInt        ncolors;
411   MPI_Comm        comm;
412   DMBoundaryType  bx;
413   ISColoringValue *colors;
414   DM_DA           *dd = (DM_DA*)da->data;
415 
416   PetscFunctionBegin;
417   /*
418          nc - number of components per grid point
419          col - number of colors needed in one direction for single component problem
420 
421   */
422   PetscCall(DMDAGetInfo(da,&dim,&m,NULL,NULL,&M,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL));
423   col  = 2*s + 1;
424   PetscCall(DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL));
425   PetscCall(DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL));
426   PetscCall(PetscObjectGetComm((PetscObject)da,&comm));
427 
428   /* create the coloring */
429   if (ctype == IS_COLORING_GLOBAL) {
430     if (!dd->localcoloring) {
431       PetscCall(PetscMalloc1(nc*nx,&colors));
432       if (dd->ofillcols) {
433         PetscInt tc = 0;
434         for (i=0; i<nc; i++) tc += (PetscInt) (dd->ofillcols[i] > 0);
435         i1 = 0;
436         for (i=xs; i<xs+nx; i++) {
437           for (l=0; l<nc; l++) {
438             if (dd->ofillcols[l] && (i % col)) {
439               colors[i1++] =  nc - 1 + tc*((i % col) - 1) + dd->ofillcols[l];
440             } else {
441               colors[i1++] = l;
442             }
443           }
444         }
445         ncolors = nc + 2*s*tc;
446       } else {
447         i1 = 0;
448         for (i=xs; i<xs+nx; i++) {
449           for (l=0; l<nc; l++) {
450             colors[i1++] = l + nc*(i % col);
451           }
452         }
453         ncolors = nc + nc*(col-1);
454       }
455       PetscCall(ISColoringCreate(comm,ncolors,nc*nx,colors,PETSC_OWN_POINTER,&dd->localcoloring));
456     }
457     *coloring = dd->localcoloring;
458   } else if (ctype == IS_COLORING_LOCAL) {
459     if (!dd->ghostedcoloring) {
460       PetscCall(PetscMalloc1(nc*gnx,&colors));
461       i1   = 0;
462       for (i=gxs; i<gxs+gnx; i++) {
463         for (l=0; l<nc; l++) {
464           /* the complicated stuff is to handle periodic boundaries */
465           colors[i1++] = l + nc*(SetInRange(i,m) % col);
466         }
467       }
468       ncolors = nc + nc*(col-1);
469       PetscCall(ISColoringCreate(comm,ncolors,nc*gnx,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring));
470       PetscCall(ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL));
471     }
472     *coloring = dd->ghostedcoloring;
473   } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
474   PetscCall(ISColoringReference(*coloring));
475   PetscFunctionReturn(0);
476 }
477 
478 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
479 {
480   PetscInt        xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,dim,s,k,nc;
481   PetscInt        ncolors;
482   MPI_Comm        comm;
483   DMBoundaryType  bx,by;
484   ISColoringValue *colors;
485   DM_DA           *dd = (DM_DA*)da->data;
486 
487   PetscFunctionBegin;
488   /*
489          nc - number of components per grid point
490          col - number of colors needed in one direction for single component problem
491 
492   */
493   PetscCall(DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,NULL));
494   PetscCall(DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL));
495   PetscCall(DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL));
496   PetscCall(PetscObjectGetComm((PetscObject)da,&comm));
497   /* create the coloring */
498   if (ctype == IS_COLORING_GLOBAL) {
499     if (!dd->localcoloring) {
500       PetscCall(PetscMalloc1(nc*nx*ny,&colors));
501       ii   = 0;
502       for (j=ys; j<ys+ny; j++) {
503         for (i=xs; i<xs+nx; i++) {
504           for (k=0; k<nc; k++) {
505             colors[ii++] = k + nc*((3*j+i) % 5);
506           }
507         }
508       }
509       ncolors = 5*nc;
510       PetscCall(ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring));
511     }
512     *coloring = dd->localcoloring;
513   } else if (ctype == IS_COLORING_LOCAL) {
514     if (!dd->ghostedcoloring) {
515       PetscCall(PetscMalloc1(nc*gnx*gny,&colors));
516       ii = 0;
517       for (j=gys; j<gys+gny; j++) {
518         for (i=gxs; i<gxs+gnx; i++) {
519           for (k=0; k<nc; k++) {
520             colors[ii++] = k + nc*((3*SetInRange(j,n) + SetInRange(i,m)) % 5);
521           }
522         }
523       }
524       ncolors = 5*nc;
525       PetscCall(ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring));
526       PetscCall(ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL));
527     }
528     *coloring = dd->ghostedcoloring;
529   } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
530   PetscFunctionReturn(0);
531 }
532 
533 /* =========================================================================== */
534 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM,Mat);
535 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM,Mat);
536 extern PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM,Mat);
537 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM,Mat);
538 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM,Mat);
539 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM,Mat);
540 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM,Mat);
541 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM,Mat);
542 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM,Mat);
543 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM,Mat);
544 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM,Mat);
545 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM,Mat);
546 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM,Mat);
547 extern PetscErrorCode DMCreateMatrix_DA_IS(DM,Mat);
548 
549 /*@C
550    MatSetupDM - Sets the DMDA that is to be used by the HYPRE_StructMatrix PETSc matrix
551 
552    Logically Collective on mat
553 
554    Input Parameters:
555 +  mat - the matrix
556 -  da - the da
557 
558    Level: intermediate
559 
560 @*/
561 PetscErrorCode MatSetupDM(Mat mat,DM da)
562 {
563   PetscFunctionBegin;
564   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
565   PetscValidHeaderSpecificType(da,DM_CLASSID,2,DMDA);
566   PetscTryMethod(mat,"MatSetupDM_C",(Mat,DM),(mat,da));
567   PetscFunctionReturn(0);
568 }
569 
570 PetscErrorCode  MatView_MPI_DA(Mat A,PetscViewer viewer)
571 {
572   DM                da;
573   const char        *prefix;
574   Mat               Anatural;
575   AO                ao;
576   PetscInt          rstart,rend,*petsc,i;
577   IS                is;
578   MPI_Comm          comm;
579   PetscViewerFormat format;
580 
581   PetscFunctionBegin;
582   /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */
583   PetscCall(PetscViewerGetFormat(viewer,&format));
584   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(0);
585 
586   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
587   PetscCall(MatGetDM(A, &da));
588   PetscCheck(da,PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
589 
590   PetscCall(DMDAGetAO(da,&ao));
591   PetscCall(MatGetOwnershipRange(A,&rstart,&rend));
592   PetscCall(PetscMalloc1(rend-rstart,&petsc));
593   for (i=rstart; i<rend; i++) petsc[i-rstart] = i;
594   PetscCall(AOApplicationToPetsc(ao,rend-rstart,petsc));
595   PetscCall(ISCreateGeneral(comm,rend-rstart,petsc,PETSC_OWN_POINTER,&is));
596 
597   /* call viewer on natural ordering */
598   PetscCall(MatCreateSubMatrix(A,is,is,MAT_INITIAL_MATRIX,&Anatural));
599   PetscCall(ISDestroy(&is));
600   PetscCall(PetscObjectGetOptionsPrefix((PetscObject)A,&prefix));
601   PetscCall(PetscObjectSetOptionsPrefix((PetscObject)Anatural,prefix));
602   PetscCall(PetscObjectSetName((PetscObject)Anatural,((PetscObject)A)->name));
603   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE;
604   PetscCall(MatView(Anatural,viewer));
605   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE;
606   PetscCall(MatDestroy(&Anatural));
607   PetscFunctionReturn(0);
608 }
609 
610 PetscErrorCode  MatLoad_MPI_DA(Mat A,PetscViewer viewer)
611 {
612   DM             da;
613   Mat            Anatural,Aapp;
614   AO             ao;
615   PetscInt       rstart,rend,*app,i,m,n,M,N;
616   IS             is;
617   MPI_Comm       comm;
618 
619   PetscFunctionBegin;
620   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
621   PetscCall(MatGetDM(A, &da));
622   PetscCheck(da,PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
623 
624   /* Load the matrix in natural ordering */
625   PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&Anatural));
626   PetscCall(MatSetType(Anatural,((PetscObject)A)->type_name));
627   PetscCall(MatGetSize(A,&M,&N));
628   PetscCall(MatGetLocalSize(A,&m,&n));
629   PetscCall(MatSetSizes(Anatural,m,n,M,N));
630   PetscCall(MatLoad(Anatural,viewer));
631 
632   /* Map natural ordering to application ordering and create IS */
633   PetscCall(DMDAGetAO(da,&ao));
634   PetscCall(MatGetOwnershipRange(Anatural,&rstart,&rend));
635   PetscCall(PetscMalloc1(rend-rstart,&app));
636   for (i=rstart; i<rend; i++) app[i-rstart] = i;
637   PetscCall(AOPetscToApplication(ao,rend-rstart,app));
638   PetscCall(ISCreateGeneral(comm,rend-rstart,app,PETSC_OWN_POINTER,&is));
639 
640   /* Do permutation and replace header */
641   PetscCall(MatCreateSubMatrix(Anatural,is,is,MAT_INITIAL_MATRIX,&Aapp));
642   PetscCall(MatHeaderReplace(A,&Aapp));
643   PetscCall(ISDestroy(&is));
644   PetscCall(MatDestroy(&Anatural));
645   PetscFunctionReturn(0);
646 }
647 
648 PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J)
649 {
650   PetscInt       dim,dof,nx,ny,nz,dims[3],starts[3],M,N,P;
651   Mat            A;
652   MPI_Comm       comm;
653   MatType        Atype;
654   void           (*aij)(void)=NULL,(*baij)(void)=NULL,(*sbaij)(void)=NULL,(*sell)(void)=NULL,(*is)(void)=NULL;
655   MatType        mtype;
656   PetscMPIInt    size;
657   DM_DA          *dd = (DM_DA*)da->data;
658 
659   PetscFunctionBegin;
660   PetscCall(MatInitializePackage());
661   mtype = da->mattype;
662 
663   /*
664                                   m
665           ------------------------------------------------------
666          |                                                     |
667          |                                                     |
668          |               ----------------------                |
669          |               |                    |                |
670       n  |           ny  |                    |                |
671          |               |                    |                |
672          |               .---------------------                |
673          |             (xs,ys)     nx                          |
674          |            .                                        |
675          |         (gxs,gys)                                   |
676          |                                                     |
677           -----------------------------------------------------
678   */
679 
680   /*
681          nc - number of components per grid point
682          col - number of colors needed in one direction for single component problem
683 
684   */
685   M   = dd->M;
686   N   = dd->N;
687   P   = dd->P;
688   dim = da->dim;
689   dof = dd->w;
690   /* PetscCall(DMDAGetInfo(da,&dim,&M,&N,&P,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL)); */
691   PetscCall(DMDAGetCorners(da,NULL,NULL,NULL,&nx,&ny,&nz));
692   PetscCall(PetscObjectGetComm((PetscObject)da,&comm));
693   PetscCall(MatCreate(comm,&A));
694   PetscCall(MatSetSizes(A,dof*nx*ny*nz,dof*nx*ny*nz,dof*M*N*P,dof*M*N*P));
695   PetscCall(MatSetType(A,mtype));
696   PetscCall(MatSetFromOptions(A));
697   if (dof*nx*ny*nz < da->bind_below) {
698     PetscCall(MatSetBindingPropagates(A,PETSC_TRUE));
699     PetscCall(MatBindToCPU(A,PETSC_TRUE));
700   }
701   PetscCall(MatSetDM(A,da));
702   if (da->structure_only) {
703     PetscCall(MatSetOption(A,MAT_STRUCTURE_ONLY,PETSC_TRUE));
704   }
705   PetscCall(MatGetType(A,&Atype));
706   /*
707      We do not provide a getmatrix function in the DMDA operations because
708    the basic DMDA does not know about matrices. We think of DMDA as being more
709    more low-level than matrices. This is kind of cheating but, cause sometimes
710    we think of DMDA has higher level than matrices.
711 
712      We could switch based on Atype (or mtype), but we do not since the
713    specialized setting routines depend only on the particular preallocation
714    details of the matrix, not the type itself.
715   */
716   PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJSetPreallocation_C",&aij));
717   if (!aij) {
718     PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJSetPreallocation_C",&aij));
719   }
720   if (!aij) {
721     PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatMPIBAIJSetPreallocation_C",&baij));
722     if (!baij) {
723       PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",&baij));
724     }
725     if (!baij) {
726       PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatMPISBAIJSetPreallocation_C",&sbaij));
727       if (!sbaij) {
728         PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C",&sbaij));
729       }
730       if (!sbaij) {
731         PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatMPISELLSetPreallocation_C",&sell));
732         if (!sell) {
733           PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatSeqSELLSetPreallocation_C",&sell));
734         }
735       }
736       if (!sell) {
737         PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatISSetPreallocation_C",&is));
738       }
739     }
740   }
741   if (aij) {
742     if (dim == 1) {
743       if (dd->ofill) {
744         PetscCall(DMCreateMatrix_DA_1d_MPIAIJ_Fill(da,A));
745       } else {
746         DMBoundaryType bx;
747         PetscMPIInt  size;
748         PetscCall(DMDAGetInfo(da,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&bx,NULL,NULL,NULL));
749         PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da),&size));
750         if (size == 1 && bx == DM_BOUNDARY_NONE) {
751           PetscCall(DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(da,A));
752         } else {
753           PetscCall(DMCreateMatrix_DA_1d_MPIAIJ(da,A));
754         }
755       }
756     } else if (dim == 2) {
757       if (dd->ofill) {
758         PetscCall(DMCreateMatrix_DA_2d_MPIAIJ_Fill(da,A));
759       } else {
760         PetscCall(DMCreateMatrix_DA_2d_MPIAIJ(da,A));
761       }
762     } else if (dim == 3) {
763       if (dd->ofill) {
764         PetscCall(DMCreateMatrix_DA_3d_MPIAIJ_Fill(da,A));
765       } else {
766         PetscCall(DMCreateMatrix_DA_3d_MPIAIJ(da,A));
767       }
768     }
769   } else if (baij) {
770     if (dim == 2) {
771       PetscCall(DMCreateMatrix_DA_2d_MPIBAIJ(da,A));
772     } else if (dim == 3) {
773       PetscCall(DMCreateMatrix_DA_3d_MPIBAIJ(da,A));
774     } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
775   } else if (sbaij) {
776     if (dim == 2) {
777       PetscCall(DMCreateMatrix_DA_2d_MPISBAIJ(da,A));
778     } else if (dim == 3) {
779       PetscCall(DMCreateMatrix_DA_3d_MPISBAIJ(da,A));
780     } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
781   } else if (sell) {
782      if (dim == 2) {
783        PetscCall(DMCreateMatrix_DA_2d_MPISELL(da,A));
784      } else if (dim == 3) {
785        PetscCall(DMCreateMatrix_DA_3d_MPISELL(da,A));
786      } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
787   } else if (is) {
788     PetscCall(DMCreateMatrix_DA_IS(da,A));
789   } else {
790     ISLocalToGlobalMapping ltog;
791 
792     PetscCall(MatSetBlockSize(A,dof));
793     PetscCall(MatSetUp(A));
794     PetscCall(DMGetLocalToGlobalMapping(da,&ltog));
795     PetscCall(MatSetLocalToGlobalMapping(A,ltog,ltog));
796   }
797   PetscCall(DMDAGetGhostCorners(da,&starts[0],&starts[1],&starts[2],&dims[0],&dims[1],&dims[2]));
798   PetscCall(MatSetStencil(A,dim,dims,starts,dof));
799   PetscCall(MatSetDM(A,da));
800   PetscCallMPI(MPI_Comm_size(comm,&size));
801   if (size > 1) {
802     /* change viewer to display matrix in natural ordering */
803     PetscCall(MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA));
804     PetscCall(MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA));
805   }
806   *J = A;
807   PetscFunctionReturn(0);
808 }
809 
810 /* ---------------------------------------------------------------------------------*/
811 PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
812 
813 PetscErrorCode DMCreateMatrix_DA_IS(DM dm,Mat J)
814 {
815   DM_DA                  *da = (DM_DA*)dm->data;
816   Mat                    lJ,P;
817   ISLocalToGlobalMapping ltog;
818   IS                     is;
819   PetscBT                bt;
820   const PetscInt         *e_loc,*idx;
821   PetscInt               i,nel,nen,nv,dof,*gidx,n,N;
822 
823   /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted)
824      We need to filter out the local indices that are not represented through the DMDAGetElements decomposition */
825   PetscFunctionBegin;
826   dof  = da->w;
827   PetscCall(MatSetBlockSize(J,dof));
828   PetscCall(DMGetLocalToGlobalMapping(dm,&ltog));
829 
830   /* flag local elements indices in local DMDA numbering */
831   PetscCall(ISLocalToGlobalMappingGetSize(ltog,&nv));
832   PetscCall(PetscBTCreate(nv/dof,&bt));
833   PetscCall(DMDAGetElements(dm,&nel,&nen,&e_loc)); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */
834   for (i=0;i<nel*nen;i++) PetscCall(PetscBTSet(bt,e_loc[i]));
835 
836   /* filter out (set to -1) the global indices not used by the local elements */
837   PetscCall(PetscMalloc1(nv/dof,&gidx));
838   PetscCall(ISLocalToGlobalMappingGetBlockIndices(ltog,&idx));
839   PetscCall(PetscArraycpy(gidx,idx,nv/dof));
840   PetscCall(ISLocalToGlobalMappingRestoreBlockIndices(ltog,&idx));
841   for (i=0;i<nv/dof;i++) if (!PetscBTLookup(bt,i)) gidx[i] = -1;
842   PetscCall(PetscBTDestroy(&bt));
843   PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)dm),dof,nv/dof,gidx,PETSC_OWN_POINTER,&is));
844   PetscCall(ISLocalToGlobalMappingCreateIS(is,&ltog));
845   PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog));
846   PetscCall(ISLocalToGlobalMappingDestroy(&ltog));
847   PetscCall(ISDestroy(&is));
848 
849   /* Preallocation */
850   if (dm->prealloc_skip) {
851     PetscCall(MatSetUp(J));
852   } else {
853     PetscCall(MatISGetLocalMat(J,&lJ));
854     PetscCall(MatGetLocalToGlobalMapping(lJ,&ltog,NULL));
855     PetscCall(MatCreate(PetscObjectComm((PetscObject)lJ),&P));
856     PetscCall(MatSetType(P,MATPREALLOCATOR));
857     PetscCall(MatSetLocalToGlobalMapping(P,ltog,ltog));
858     PetscCall(MatGetSize(lJ,&N,NULL));
859     PetscCall(MatGetLocalSize(lJ,&n,NULL));
860     PetscCall(MatSetSizes(P,n,n,N,N));
861     PetscCall(MatSetBlockSize(P,dof));
862     PetscCall(MatSetUp(P));
863     for (i=0;i<nel;i++) {
864       PetscCall(MatSetValuesBlockedLocal(P,nen,e_loc+i*nen,nen,e_loc+i*nen,NULL,INSERT_VALUES));
865     }
866     PetscCall(MatPreallocatorPreallocate(P,(PetscBool)!da->prealloc_only,lJ));
867     PetscCall(MatISRestoreLocalMat(J,&lJ));
868     PetscCall(DMDARestoreElements(dm,&nel,&nen,&e_loc));
869     PetscCall(MatDestroy(&P));
870 
871     PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY));
872     PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY));
873   }
874   PetscFunctionReturn(0);
875 }
876 
877 PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da,Mat J)
878 {
879   PetscErrorCode         ierr;
880   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p;
881   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
882   MPI_Comm               comm;
883   PetscScalar            *values;
884   DMBoundaryType         bx,by;
885   ISLocalToGlobalMapping ltog;
886   DMDAStencilType        st;
887 
888   PetscFunctionBegin;
889   /*
890          nc - number of components per grid point
891          col - number of colors needed in one direction for single component problem
892 
893   */
894   PetscCall(DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st));
895   col  = 2*s + 1;
896   PetscCall(DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL));
897   PetscCall(DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL));
898   PetscCall(PetscObjectGetComm((PetscObject)da,&comm));
899 
900   PetscCall(PetscMalloc2(nc,&rows,col*col*nc*nc,&cols));
901   PetscCall(DMGetLocalToGlobalMapping(da,&ltog));
902 
903   PetscCall(MatSetBlockSize(J,nc));
904   /* determine the matrix preallocation information */
905   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);PetscCall(ierr);
906   for (i=xs; i<xs+nx; i++) {
907 
908     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
909     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
910 
911     for (j=ys; j<ys+ny; j++) {
912       slot = i - gxs + gnx*(j - gys);
913 
914       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
915       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
916 
917       cnt = 0;
918       for (k=0; k<nc; k++) {
919         for (l=lstart; l<lend+1; l++) {
920           for (p=pstart; p<pend+1; p++) {
921             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
922               cols[cnt++] = k + nc*(slot + gnx*l + p);
923             }
924           }
925         }
926         rows[k] = k + nc*(slot);
927       }
928       PetscCall(MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz));
929     }
930   }
931   PetscCall(MatSetBlockSize(J,nc));
932   PetscCall(MatSeqSELLSetPreallocation(J,0,dnz));
933   PetscCall(MatMPISELLSetPreallocation(J,0,dnz,0,onz));
934   ierr = MatPreallocateFinalize(dnz,onz);PetscCall(ierr);
935 
936   PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog));
937 
938   /*
939     For each node in the grid: we get the neighbors in the local (on processor ordering
940     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
941     PETSc ordering.
942   */
943   if (!da->prealloc_only) {
944     PetscCall(PetscCalloc1(col*col*nc*nc,&values));
945     for (i=xs; i<xs+nx; i++) {
946 
947       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
948       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
949 
950       for (j=ys; j<ys+ny; j++) {
951         slot = i - gxs + gnx*(j - gys);
952 
953         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
954         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
955 
956         cnt = 0;
957         for (k=0; k<nc; k++) {
958           for (l=lstart; l<lend+1; l++) {
959             for (p=pstart; p<pend+1; p++) {
960               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
961                 cols[cnt++] = k + nc*(slot + gnx*l + p);
962               }
963             }
964           }
965           rows[k] = k + nc*(slot);
966         }
967         PetscCall(MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES));
968       }
969     }
970     PetscCall(PetscFree(values));
971     /* do not copy values to GPU since they are all zero and not yet needed there */
972     PetscCall(MatBindToCPU(J,PETSC_TRUE));
973     PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY));
974     PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY));
975     PetscCall(MatBindToCPU(J,PETSC_FALSE));
976     PetscCall(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
977   }
978   PetscCall(PetscFree2(rows,cols));
979   PetscFunctionReturn(0);
980 }
981 
982 PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da,Mat J)
983 {
984   PetscErrorCode         ierr;
985   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
986   PetscInt               m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL;
987   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
988   MPI_Comm               comm;
989   PetscScalar            *values;
990   DMBoundaryType         bx,by,bz;
991   ISLocalToGlobalMapping ltog;
992   DMDAStencilType        st;
993 
994   PetscFunctionBegin;
995   /*
996          nc - number of components per grid point
997          col - number of colors needed in one direction for single component problem
998 
999   */
1000   PetscCall(DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st));
1001   col  = 2*s + 1;
1002   PetscCall(DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz));
1003   PetscCall(DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz));
1004   PetscCall(PetscObjectGetComm((PetscObject)da,&comm));
1005 
1006   PetscCall(PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols));
1007   PetscCall(DMGetLocalToGlobalMapping(da,&ltog));
1008 
1009   PetscCall(MatSetBlockSize(J,nc));
1010   /* determine the matrix preallocation information */
1011   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);PetscCall(ierr);
1012   for (i=xs; i<xs+nx; i++) {
1013     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1014     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1015     for (j=ys; j<ys+ny; j++) {
1016       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1017       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1018       for (k=zs; k<zs+nz; k++) {
1019         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1020         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1021 
1022         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1023 
1024         cnt = 0;
1025         for (l=0; l<nc; l++) {
1026           for (ii=istart; ii<iend+1; ii++) {
1027             for (jj=jstart; jj<jend+1; jj++) {
1028               for (kk=kstart; kk<kend+1; kk++) {
1029                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1030                   cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1031                 }
1032               }
1033             }
1034           }
1035           rows[l] = l + nc*(slot);
1036         }
1037         PetscCall(MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz));
1038       }
1039     }
1040   }
1041   PetscCall(MatSetBlockSize(J,nc));
1042   PetscCall(MatSeqSELLSetPreallocation(J,0,dnz));
1043   PetscCall(MatMPISELLSetPreallocation(J,0,dnz,0,onz));
1044   ierr = MatPreallocateFinalize(dnz,onz);PetscCall(ierr);
1045   PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog));
1046 
1047   /*
1048     For each node in the grid: we get the neighbors in the local (on processor ordering
1049     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1050     PETSc ordering.
1051   */
1052   if (!da->prealloc_only) {
1053     PetscCall(PetscCalloc1(col*col*col*nc*nc*nc,&values));
1054     for (i=xs; i<xs+nx; i++) {
1055       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1056       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1057       for (j=ys; j<ys+ny; j++) {
1058         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1059         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1060         for (k=zs; k<zs+nz; k++) {
1061           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1062           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1063 
1064           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1065 
1066           cnt = 0;
1067           for (l=0; l<nc; l++) {
1068             for (ii=istart; ii<iend+1; ii++) {
1069               for (jj=jstart; jj<jend+1; jj++) {
1070                 for (kk=kstart; kk<kend+1; kk++) {
1071                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1072                     cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1073                   }
1074                 }
1075               }
1076             }
1077             rows[l] = l + nc*(slot);
1078           }
1079           PetscCall(MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES));
1080         }
1081       }
1082     }
1083     PetscCall(PetscFree(values));
1084     /* do not copy values to GPU since they are all zero and not yet needed there */
1085     PetscCall(MatBindToCPU(J,PETSC_TRUE));
1086     PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY));
1087     PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY));
1088     PetscCall(MatBindToCPU(J,PETSC_FALSE));
1089     PetscCall(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
1090   }
1091   PetscCall(PetscFree2(rows,cols));
1092   PetscFunctionReturn(0);
1093 }
1094 
1095 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da,Mat J)
1096 {
1097   PetscErrorCode         ierr;
1098   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,M,N;
1099   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
1100   MPI_Comm               comm;
1101   DMBoundaryType         bx,by;
1102   ISLocalToGlobalMapping ltog,mltog;
1103   DMDAStencilType        st;
1104   PetscBool              removedups = PETSC_FALSE,alreadyboundtocpu = PETSC_TRUE;
1105 
1106   PetscFunctionBegin;
1107   /*
1108          nc - number of components per grid point
1109          col - number of colors needed in one direction for single component problem
1110 
1111   */
1112   PetscCall(DMDAGetInfo(da,&dim,&m,&n,NULL,&M,&N,NULL,&nc,&s,&bx,&by,NULL,&st));
1113   if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) {
1114     PetscCall(MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE));
1115   }
1116   col  = 2*s + 1;
1117   /*
1118        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1119        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1120   */
1121   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1122   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1123   PetscCall(DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL));
1124   PetscCall(DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL));
1125   PetscCall(PetscObjectGetComm((PetscObject)da,&comm));
1126 
1127   PetscCall(PetscMalloc2(nc,&rows,col*col*nc*nc,&cols));
1128   PetscCall(DMGetLocalToGlobalMapping(da,&ltog));
1129 
1130   PetscCall(MatSetBlockSize(J,nc));
1131   /* determine the matrix preallocation information */
1132   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);PetscCall(ierr);
1133   for (i=xs; i<xs+nx; i++) {
1134     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1135     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1136 
1137     for (j=ys; j<ys+ny; j++) {
1138       slot = i - gxs + gnx*(j - gys);
1139 
1140       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1141       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1142 
1143       cnt = 0;
1144       for (k=0; k<nc; k++) {
1145         for (l=lstart; l<lend+1; l++) {
1146           for (p=pstart; p<pend+1; p++) {
1147             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
1148               cols[cnt++] = k + nc*(slot + gnx*l + p);
1149             }
1150           }
1151         }
1152         rows[k] = k + nc*(slot);
1153       }
1154       if (removedups) {
1155         PetscCall(MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz));
1156       } else {
1157         PetscCall(MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz));
1158       }
1159     }
1160   }
1161   PetscCall(MatSetBlockSize(J,nc));
1162   PetscCall(MatSeqAIJSetPreallocation(J,0,dnz));
1163   PetscCall(MatMPIAIJSetPreallocation(J,0,dnz,0,onz));
1164   ierr = MatPreallocateFinalize(dnz,onz);PetscCall(ierr);
1165   PetscCall(MatGetLocalToGlobalMapping(J,&mltog,NULL));
1166   if (!mltog) {
1167     PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog));
1168   }
1169 
1170   /*
1171     For each node in the grid: we get the neighbors in the local (on processor ordering
1172     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1173     PETSc ordering.
1174   */
1175   if (!da->prealloc_only) {
1176     for (i=xs; i<xs+nx; i++) {
1177 
1178       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1179       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1180 
1181       for (j=ys; j<ys+ny; j++) {
1182         slot = i - gxs + gnx*(j - gys);
1183 
1184         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1185         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1186 
1187         cnt = 0;
1188         for (l=lstart; l<lend+1; l++) {
1189           for (p=pstart; p<pend+1; p++) {
1190             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
1191               cols[cnt++] = nc*(slot + gnx*l + p);
1192               for (k=1; k<nc; k++) {
1193                 cols[cnt] = 1 + cols[cnt-1];cnt++;
1194               }
1195             }
1196           }
1197         }
1198         for (k=0; k<nc; k++) rows[k] = k + nc*(slot);
1199         PetscCall(MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES));
1200       }
1201     }
1202     /* do not copy values to GPU since they are all zero and not yet needed there */
1203     PetscCall(MatBoundToCPU(J,&alreadyboundtocpu));
1204     PetscCall(MatBindToCPU(J,PETSC_TRUE));
1205     PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY));
1206     PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY));
1207     if (!alreadyboundtocpu) PetscCall(MatBindToCPU(J,PETSC_FALSE));
1208     PetscCall(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
1209     if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) {
1210       PetscCall(MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE));
1211     }
1212   }
1213   PetscCall(PetscFree2(rows,cols));
1214   PetscFunctionReturn(0);
1215 }
1216 
1217 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da,Mat J)
1218 {
1219   PetscErrorCode         ierr;
1220   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1221   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt,maxcnt = 0,l,p,M,N;
1222   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
1223   DM_DA                  *dd = (DM_DA*)da->data;
1224   PetscInt               ifill_col,*ofill = dd->ofill, *dfill = dd->dfill;
1225   MPI_Comm               comm;
1226   DMBoundaryType         bx,by;
1227   ISLocalToGlobalMapping ltog;
1228   DMDAStencilType        st;
1229   PetscBool              removedups = PETSC_FALSE;
1230 
1231   PetscFunctionBegin;
1232   /*
1233          nc - number of components per grid point
1234          col - number of colors needed in one direction for single component problem
1235 
1236   */
1237   PetscCall(DMDAGetInfo(da,&dim,&m,&n,NULL,&M,&N,NULL,&nc,&s,&bx,&by,NULL,&st));
1238   col  = 2*s + 1;
1239   /*
1240        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1241        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1242   */
1243   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1244   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1245   PetscCall(DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL));
1246   PetscCall(DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL));
1247   PetscCall(PetscObjectGetComm((PetscObject)da,&comm));
1248 
1249   PetscCall(PetscMalloc1(col*col*nc,&cols));
1250   PetscCall(DMGetLocalToGlobalMapping(da,&ltog));
1251 
1252   PetscCall(MatSetBlockSize(J,nc));
1253   /* determine the matrix preallocation information */
1254   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);PetscCall(ierr);
1255   for (i=xs; i<xs+nx; i++) {
1256 
1257     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1258     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1259 
1260     for (j=ys; j<ys+ny; j++) {
1261       slot = i - gxs + gnx*(j - gys);
1262 
1263       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1264       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1265 
1266       for (k=0; k<nc; k++) {
1267         cnt = 0;
1268         for (l=lstart; l<lend+1; l++) {
1269           for (p=pstart; p<pend+1; p++) {
1270             if (l || p) {
1271               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
1272                 for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p);
1273               }
1274             } else {
1275               if (dfill) {
1276                 for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p);
1277               } else {
1278                 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p);
1279               }
1280             }
1281           }
1282         }
1283         row    = k + nc*(slot);
1284         maxcnt = PetscMax(maxcnt,cnt);
1285         if (removedups) {
1286           PetscCall(MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz));
1287         } else {
1288           PetscCall(MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz));
1289         }
1290       }
1291     }
1292   }
1293   PetscCall(MatSeqAIJSetPreallocation(J,0,dnz));
1294   PetscCall(MatMPIAIJSetPreallocation(J,0,dnz,0,onz));
1295   ierr = MatPreallocateFinalize(dnz,onz);PetscCall(ierr);
1296   PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog));
1297 
1298   /*
1299     For each node in the grid: we get the neighbors in the local (on processor ordering
1300     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1301     PETSc ordering.
1302   */
1303   if (!da->prealloc_only) {
1304     for (i=xs; i<xs+nx; i++) {
1305 
1306       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1307       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1308 
1309       for (j=ys; j<ys+ny; j++) {
1310         slot = i - gxs + gnx*(j - gys);
1311 
1312         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1313         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1314 
1315         for (k=0; k<nc; k++) {
1316           cnt = 0;
1317           for (l=lstart; l<lend+1; l++) {
1318             for (p=pstart; p<pend+1; p++) {
1319               if (l || p) {
1320                 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
1321                   for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p);
1322                 }
1323               } else {
1324                 if (dfill) {
1325                   for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p);
1326                 } else {
1327                   for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p);
1328                 }
1329               }
1330             }
1331           }
1332           row  = k + nc*(slot);
1333           PetscCall(MatSetValuesLocal(J,1,&row,cnt,cols,NULL,INSERT_VALUES));
1334         }
1335       }
1336     }
1337     /* do not copy values to GPU since they are all zero and not yet needed there */
1338     PetscCall(MatBindToCPU(J,PETSC_TRUE));
1339     PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY));
1340     PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY));
1341     PetscCall(MatBindToCPU(J,PETSC_FALSE));
1342     PetscCall(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
1343   }
1344   PetscCall(PetscFree(cols));
1345   PetscFunctionReturn(0);
1346 }
1347 
1348 /* ---------------------------------------------------------------------------------*/
1349 
1350 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da,Mat J)
1351 {
1352   PetscErrorCode         ierr;
1353   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1354   PetscInt               m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL;
1355   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
1356   MPI_Comm               comm;
1357   DMBoundaryType         bx,by,bz;
1358   ISLocalToGlobalMapping ltog,mltog;
1359   DMDAStencilType        st;
1360   PetscBool              removedups = PETSC_FALSE;
1361 
1362   PetscFunctionBegin;
1363   /*
1364          nc - number of components per grid point
1365          col - number of colors needed in one direction for single component problem
1366 
1367   */
1368   PetscCall(DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st));
1369   if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) {
1370     PetscCall(MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE));
1371   }
1372   col  = 2*s + 1;
1373 
1374   /*
1375        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1376        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1377   */
1378   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1379   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1380   if (P == 1 && 2*s >= p) removedups = PETSC_TRUE;
1381 
1382   PetscCall(DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz));
1383   PetscCall(DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz));
1384   PetscCall(PetscObjectGetComm((PetscObject)da,&comm));
1385 
1386   PetscCall(PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols));
1387   PetscCall(DMGetLocalToGlobalMapping(da,&ltog));
1388 
1389   PetscCall(MatSetBlockSize(J,nc));
1390   /* determine the matrix preallocation information */
1391   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);PetscCall(ierr);
1392   for (i=xs; i<xs+nx; i++) {
1393     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1394     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1395     for (j=ys; j<ys+ny; j++) {
1396       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1397       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1398       for (k=zs; k<zs+nz; k++) {
1399         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1400         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1401 
1402         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1403 
1404         cnt = 0;
1405         for (l=0; l<nc; l++) {
1406           for (ii=istart; ii<iend+1; ii++) {
1407             for (jj=jstart; jj<jend+1; jj++) {
1408               for (kk=kstart; kk<kend+1; kk++) {
1409                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1410                   cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1411                 }
1412               }
1413             }
1414           }
1415           rows[l] = l + nc*(slot);
1416         }
1417         if (removedups) {
1418           PetscCall(MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz));
1419         } else {
1420           PetscCall(MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz));
1421         }
1422       }
1423     }
1424   }
1425   PetscCall(MatSetBlockSize(J,nc));
1426   PetscCall(MatSeqAIJSetPreallocation(J,0,dnz));
1427   PetscCall(MatMPIAIJSetPreallocation(J,0,dnz,0,onz));
1428   ierr = MatPreallocateFinalize(dnz,onz);PetscCall(ierr);
1429   PetscCall(MatGetLocalToGlobalMapping(J,&mltog,NULL));
1430   if (!mltog) {
1431     PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog));
1432   }
1433 
1434   /*
1435     For each node in the grid: we get the neighbors in the local (on processor ordering
1436     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1437     PETSc ordering.
1438   */
1439   if (!da->prealloc_only) {
1440     for (i=xs; i<xs+nx; i++) {
1441       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1442       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1443       for (j=ys; j<ys+ny; j++) {
1444         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1445         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1446         for (k=zs; k<zs+nz; k++) {
1447           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1448           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1449 
1450           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1451 
1452           cnt = 0;
1453           for (kk=kstart; kk<kend+1; kk++) {
1454             for (jj=jstart; jj<jend+1; jj++) {
1455               for (ii=istart; ii<iend+1; ii++) {
1456                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1457                   cols[cnt++] = nc*(slot + ii + gnx*jj + gnx*gny*kk);
1458                     for (l=1; l<nc; l++) {
1459                       cols[cnt] = 1 + cols[cnt-1];cnt++;
1460                   }
1461                 }
1462               }
1463             }
1464           }
1465           rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1];
1466           PetscCall(MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES));
1467         }
1468       }
1469     }
1470     /* do not copy values to GPU since they are all zero and not yet needed there */
1471     PetscCall(MatBindToCPU(J,PETSC_TRUE));
1472     PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY));
1473     PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY));
1474     if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) {
1475       PetscCall(MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE));
1476     }
1477     PetscCall(MatBindToCPU(J,PETSC_FALSE));
1478     PetscCall(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
1479   }
1480   PetscCall(PetscFree2(rows,cols));
1481   PetscFunctionReturn(0);
1482 }
1483 
1484 /* ---------------------------------------------------------------------------------*/
1485 
1486 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da,Mat J)
1487 {
1488   DM_DA                  *dd = (DM_DA*)da->data;
1489   PetscInt               xs,nx,i,j,gxs,gnx,row,k,l;
1490   PetscInt               m,dim,s,*cols = NULL,nc,cnt,maxcnt = 0,*ocols;
1491   PetscInt               *ofill = dd->ofill,*dfill = dd->dfill;
1492   DMBoundaryType         bx;
1493   ISLocalToGlobalMapping ltog;
1494   PetscMPIInt            rank,size;
1495 
1496   PetscFunctionBegin;
1497   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)da),&rank));
1498   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da),&size));
1499 
1500   /*
1501          nc - number of components per grid point
1502 
1503   */
1504   PetscCall(DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL));
1505   PetscCheckFalse(s > 1,PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Matrix creation for 1d not implemented correctly for stencil width larger than 1");
1506   PetscCall(DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL));
1507   PetscCall(DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL));
1508 
1509   PetscCall(MatSetBlockSize(J,nc));
1510   PetscCall(PetscCalloc2(nx*nc,&cols,nx*nc,&ocols));
1511 
1512   /*
1513         note should be smaller for first and last process with no periodic
1514         does not handle dfill
1515   */
1516   cnt = 0;
1517   /* coupling with process to the left */
1518   for (i=0; i<s; i++) {
1519     for (j=0; j<nc; j++) {
1520       ocols[cnt] = ((rank == 0) ? 0 : (s - i)*(ofill[j+1] - ofill[j]));
1521       cols[cnt]  = dfill[j+1] - dfill[j] + (s + i)*(ofill[j+1] - ofill[j]);
1522       if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1523         if (size > 1) ocols[cnt] += (s - i)*(ofill[j+1] - ofill[j]);
1524         else cols[cnt] += (s - i)*(ofill[j+1] - ofill[j]);
1525       }
1526       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1527       cnt++;
1528     }
1529   }
1530   for (i=s; i<nx-s; i++) {
1531     for (j=0; j<nc; j++) {
1532       cols[cnt] = dfill[j+1] - dfill[j] + 2*s*(ofill[j+1] - ofill[j]);
1533       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1534       cnt++;
1535     }
1536   }
1537   /* coupling with process to the right */
1538   for (i=nx-s; i<nx; i++) {
1539     for (j=0; j<nc; j++) {
1540       ocols[cnt] = ((rank == (size-1)) ? 0 : (i - nx + s + 1)*(ofill[j+1] - ofill[j]));
1541       cols[cnt]  = dfill[j+1] - dfill[j] + (s + nx - i - 1)*(ofill[j+1] - ofill[j]);
1542       if ((rank == size-1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1543         if (size > 1) ocols[cnt] += (i - nx + s + 1)*(ofill[j+1] - ofill[j]);
1544         else cols[cnt] += (i - nx + s + 1)*(ofill[j+1] - ofill[j]);
1545       }
1546       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1547       cnt++;
1548     }
1549   }
1550 
1551   PetscCall(MatSeqAIJSetPreallocation(J,0,cols));
1552   PetscCall(MatMPIAIJSetPreallocation(J,0,cols,0,ocols));
1553   PetscCall(PetscFree2(cols,ocols));
1554 
1555   PetscCall(DMGetLocalToGlobalMapping(da,&ltog));
1556   PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog));
1557 
1558   /*
1559     For each node in the grid: we get the neighbors in the local (on processor ordering
1560     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1561     PETSc ordering.
1562   */
1563   if (!da->prealloc_only) {
1564     PetscCall(PetscMalloc1(maxcnt,&cols));
1565     row = xs*nc;
1566     /* coupling with process to the left */
1567     for (i=xs; i<xs+s; i++) {
1568       for (j=0; j<nc; j++) {
1569         cnt = 0;
1570         if (rank) {
1571           for (l=0; l<s; l++) {
1572             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1573           }
1574         }
1575         if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1576           for (l=0; l<s; l++) {
1577             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (m + i - s - l)*nc + ofill[k];
1578           }
1579         }
1580         if (dfill) {
1581           for (k=dfill[j]; k<dfill[j+1]; k++) {
1582             cols[cnt++] = i*nc + dfill[k];
1583           }
1584         } else {
1585           for (k=0; k<nc; k++) {
1586             cols[cnt++] = i*nc + k;
1587           }
1588         }
1589         for (l=0; l<s; l++) {
1590           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1591         }
1592         PetscCall(MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES));
1593         row++;
1594       }
1595     }
1596     for (i=xs+s; i<xs+nx-s; i++) {
1597       for (j=0; j<nc; j++) {
1598         cnt = 0;
1599         for (l=0; l<s; l++) {
1600           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1601         }
1602         if (dfill) {
1603           for (k=dfill[j]; k<dfill[j+1]; k++) {
1604             cols[cnt++] = i*nc + dfill[k];
1605           }
1606         } else {
1607           for (k=0; k<nc; k++) {
1608             cols[cnt++] = i*nc + k;
1609           }
1610         }
1611         for (l=0; l<s; l++) {
1612           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1613         }
1614         PetscCall(MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES));
1615         row++;
1616       }
1617     }
1618     /* coupling with process to the right */
1619     for (i=xs+nx-s; i<xs+nx; i++) {
1620       for (j=0; j<nc; j++) {
1621         cnt = 0;
1622         for (l=0; l<s; l++) {
1623           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1624         }
1625         if (dfill) {
1626           for (k=dfill[j]; k<dfill[j+1]; k++) {
1627             cols[cnt++] = i*nc + dfill[k];
1628           }
1629         } else {
1630           for (k=0; k<nc; k++) {
1631             cols[cnt++] = i*nc + k;
1632           }
1633         }
1634         if (rank < size-1) {
1635           for (l=0; l<s; l++) {
1636             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1637           }
1638         }
1639         if ((rank == size-1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1640           for (l=0; l<s; l++) {
1641             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s - l - m + 2)*nc + ofill[k];
1642           }
1643         }
1644         PetscCall(MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES));
1645         row++;
1646       }
1647     }
1648     PetscCall(PetscFree(cols));
1649     /* do not copy values to GPU since they are all zero and not yet needed there */
1650     PetscCall(MatBindToCPU(J,PETSC_TRUE));
1651     PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY));
1652     PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY));
1653     PetscCall(MatBindToCPU(J,PETSC_FALSE));
1654     PetscCall(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
1655   }
1656   PetscFunctionReturn(0);
1657 }
1658 
1659 /* ---------------------------------------------------------------------------------*/
1660 
1661 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da,Mat J)
1662 {
1663   PetscInt               xs,nx,i,i1,slot,gxs,gnx;
1664   PetscInt               m,dim,s,*cols = NULL,nc,*rows = NULL,col,cnt,l;
1665   PetscInt               istart,iend;
1666   DMBoundaryType         bx;
1667   ISLocalToGlobalMapping ltog,mltog;
1668 
1669   PetscFunctionBegin;
1670   /*
1671          nc - number of components per grid point
1672          col - number of colors needed in one direction for single component problem
1673 
1674   */
1675   PetscCall(DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL));
1676   if (bx == DM_BOUNDARY_NONE) {
1677     PetscCall(MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE));
1678   }
1679   col  = 2*s + 1;
1680 
1681   PetscCall(DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL));
1682   PetscCall(DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL));
1683 
1684   PetscCall(MatSetBlockSize(J,nc));
1685   PetscCall(MatSeqAIJSetPreallocation(J,col*nc,NULL));
1686   PetscCall(MatMPIAIJSetPreallocation(J,col*nc,NULL,col*nc,NULL));
1687 
1688   PetscCall(DMGetLocalToGlobalMapping(da,&ltog));
1689   PetscCall(MatGetLocalToGlobalMapping(J,&mltog,NULL));
1690   if (!mltog) {
1691     PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog));
1692   }
1693 
1694   /*
1695     For each node in the grid: we get the neighbors in the local (on processor ordering
1696     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1697     PETSc ordering.
1698   */
1699   if (!da->prealloc_only) {
1700     PetscCall(PetscMalloc2(nc,&rows,col*nc*nc,&cols));
1701     for (i=xs; i<xs+nx; i++) {
1702       istart = PetscMax(-s,gxs - i);
1703       iend   = PetscMin(s,gxs + gnx - i - 1);
1704       slot   = i - gxs;
1705 
1706       cnt = 0;
1707       for (i1=istart; i1<iend+1; i1++) {
1708         cols[cnt++] = nc*(slot + i1);
1709         for (l=1; l<nc; l++) {
1710           cols[cnt] = 1 + cols[cnt-1];cnt++;
1711         }
1712       }
1713       rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1];
1714       PetscCall(MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES));
1715     }
1716     /* do not copy values to GPU since they are all zero and not yet needed there */
1717     PetscCall(MatBindToCPU(J,PETSC_TRUE));
1718     PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY));
1719     PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY));
1720     if (bx == DM_BOUNDARY_NONE) {
1721       PetscCall(MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE));
1722     }
1723     PetscCall(MatBindToCPU(J,PETSC_FALSE));
1724     PetscCall(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
1725     PetscCall(PetscFree2(rows,cols));
1726   }
1727   PetscFunctionReturn(0);
1728 }
1729 
1730 /* ---------------------------------------------------------------------------------*/
1731 
1732 PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM da,Mat J)
1733 {
1734   PetscInt               xs,nx,i,i1,slot,gxs,gnx;
1735   PetscInt               m,dim,s,*cols = NULL,nc,*rows = NULL,col,cnt,l;
1736   PetscInt               istart,iend;
1737   DMBoundaryType         bx;
1738   ISLocalToGlobalMapping ltog,mltog;
1739 
1740   PetscFunctionBegin;
1741   /*
1742          nc - number of components per grid point
1743          col - number of colors needed in one direction for single component problem
1744   */
1745   PetscCall(DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL));
1746   col  = 2*s + 1;
1747 
1748   PetscCall(DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL));
1749   PetscCall(DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL));
1750 
1751   PetscCall(MatSetBlockSize(J,nc));
1752   PetscCall(MatSeqAIJSetTotalPreallocation(J,nx*nc*col*nc));
1753 
1754   PetscCall(DMGetLocalToGlobalMapping(da,&ltog));
1755   PetscCall(MatGetLocalToGlobalMapping(J,&mltog,NULL));
1756   if (!mltog) {
1757     PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog));
1758   }
1759 
1760   /*
1761     For each node in the grid: we get the neighbors in the local (on processor ordering
1762     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1763     PETSc ordering.
1764   */
1765   if (!da->prealloc_only) {
1766     PetscCall(PetscMalloc2(nc,&rows,col*nc*nc,&cols));
1767     for (i=xs; i<xs+nx; i++) {
1768       istart = PetscMax(-s,gxs - i);
1769       iend   = PetscMin(s,gxs + gnx - i - 1);
1770       slot   = i - gxs;
1771 
1772       cnt = 0;
1773       for (i1=istart; i1<iend+1; i1++) {
1774         cols[cnt++] = nc*(slot + i1);
1775         for (l=1; l<nc; l++) {
1776           cols[cnt] = 1 + cols[cnt-1];cnt++;
1777         }
1778       }
1779       rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1];
1780       PetscCall(MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES));
1781     }
1782     /* do not copy values to GPU since they are all zero and not yet needed there */
1783     PetscCall(MatBindToCPU(J,PETSC_TRUE));
1784     PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY));
1785     PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY));
1786     if (bx == DM_BOUNDARY_NONE) {
1787       PetscCall(MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE));
1788     }
1789     PetscCall(MatBindToCPU(J,PETSC_FALSE));
1790     PetscCall(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
1791     PetscCall(PetscFree2(rows,cols));
1792   }
1793   PetscCall(MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE));
1794   PetscFunctionReturn(0);
1795 }
1796 
1797 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da,Mat J)
1798 {
1799   PetscErrorCode         ierr;
1800   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1801   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
1802   PetscInt               istart,iend,jstart,jend,ii,jj;
1803   MPI_Comm               comm;
1804   PetscScalar            *values;
1805   DMBoundaryType         bx,by;
1806   DMDAStencilType        st;
1807   ISLocalToGlobalMapping ltog;
1808 
1809   PetscFunctionBegin;
1810   /*
1811      nc - number of components per grid point
1812      col - number of colors needed in one direction for single component problem
1813   */
1814   PetscCall(DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st));
1815   col  = 2*s + 1;
1816 
1817   PetscCall(DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL));
1818   PetscCall(DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL));
1819   PetscCall(PetscObjectGetComm((PetscObject)da,&comm));
1820 
1821   PetscCall(PetscMalloc1(col*col*nc*nc,&cols));
1822 
1823   PetscCall(DMGetLocalToGlobalMapping(da,&ltog));
1824 
1825   /* determine the matrix preallocation information */
1826   ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);PetscCall(ierr);
1827   for (i=xs; i<xs+nx; i++) {
1828     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1829     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1830     for (j=ys; j<ys+ny; j++) {
1831       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1832       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1833       slot   = i - gxs + gnx*(j - gys);
1834 
1835       /* Find block columns in block row */
1836       cnt = 0;
1837       for (ii=istart; ii<iend+1; ii++) {
1838         for (jj=jstart; jj<jend+1; jj++) {
1839           if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1840             cols[cnt++] = slot + ii + gnx*jj;
1841           }
1842         }
1843       }
1844       PetscCall(MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz));
1845     }
1846   }
1847   PetscCall(MatSeqBAIJSetPreallocation(J,nc,0,dnz));
1848   PetscCall(MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz));
1849   ierr = MatPreallocateFinalize(dnz,onz);PetscCall(ierr);
1850 
1851   PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog));
1852 
1853   /*
1854     For each node in the grid: we get the neighbors in the local (on processor ordering
1855     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1856     PETSc ordering.
1857   */
1858   if (!da->prealloc_only) {
1859     PetscCall(PetscCalloc1(col*col*nc*nc,&values));
1860     for (i=xs; i<xs+nx; i++) {
1861       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1862       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1863       for (j=ys; j<ys+ny; j++) {
1864         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1865         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1866         slot = i - gxs + gnx*(j - gys);
1867         cnt  = 0;
1868         for (ii=istart; ii<iend+1; ii++) {
1869           for (jj=jstart; jj<jend+1; jj++) {
1870             if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1871               cols[cnt++] = slot + ii + gnx*jj;
1872             }
1873           }
1874         }
1875         PetscCall(MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES));
1876       }
1877     }
1878     PetscCall(PetscFree(values));
1879     /* do not copy values to GPU since they are all zero and not yet needed there */
1880     PetscCall(MatBindToCPU(J,PETSC_TRUE));
1881     PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY));
1882     PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY));
1883     PetscCall(MatBindToCPU(J,PETSC_FALSE));
1884     PetscCall(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
1885   }
1886   PetscCall(PetscFree(cols));
1887   PetscFunctionReturn(0);
1888 }
1889 
1890 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da,Mat J)
1891 {
1892   PetscErrorCode         ierr;
1893   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1894   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
1895   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1896   MPI_Comm               comm;
1897   PetscScalar            *values;
1898   DMBoundaryType         bx,by,bz;
1899   DMDAStencilType        st;
1900   ISLocalToGlobalMapping ltog;
1901 
1902   PetscFunctionBegin;
1903   /*
1904          nc - number of components per grid point
1905          col - number of colors needed in one direction for single component problem
1906 
1907   */
1908   PetscCall(DMDAGetInfo(da,&dim,&m,&n,&p,NULL,NULL,NULL,&nc,&s,&bx,&by,&bz,&st));
1909   col  = 2*s + 1;
1910 
1911   PetscCall(DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz));
1912   PetscCall(DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz));
1913   PetscCall(PetscObjectGetComm((PetscObject)da,&comm));
1914 
1915   PetscCall(PetscMalloc1(col*col*col,&cols));
1916 
1917   PetscCall(DMGetLocalToGlobalMapping(da,&ltog));
1918 
1919   /* determine the matrix preallocation information */
1920   ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);PetscCall(ierr);
1921   for (i=xs; i<xs+nx; i++) {
1922     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1923     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1924     for (j=ys; j<ys+ny; j++) {
1925       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1926       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1927       for (k=zs; k<zs+nz; k++) {
1928         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1929         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1930 
1931         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1932 
1933         /* Find block columns in block row */
1934         cnt = 0;
1935         for (ii=istart; ii<iend+1; ii++) {
1936           for (jj=jstart; jj<jend+1; jj++) {
1937             for (kk=kstart; kk<kend+1; kk++) {
1938               if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1939                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1940               }
1941             }
1942           }
1943         }
1944         PetscCall(MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz));
1945       }
1946     }
1947   }
1948   PetscCall(MatSeqBAIJSetPreallocation(J,nc,0,dnz));
1949   PetscCall(MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz));
1950   ierr = MatPreallocateFinalize(dnz,onz);PetscCall(ierr);
1951 
1952   PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog));
1953 
1954   /*
1955     For each node in the grid: we get the neighbors in the local (on processor ordering
1956     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1957     PETSc ordering.
1958   */
1959   if (!da->prealloc_only) {
1960     PetscCall(PetscCalloc1(col*col*col*nc*nc,&values));
1961     for (i=xs; i<xs+nx; i++) {
1962       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1963       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1964       for (j=ys; j<ys+ny; j++) {
1965         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1966         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1967         for (k=zs; k<zs+nz; k++) {
1968           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1969           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1970 
1971           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1972 
1973           cnt = 0;
1974           for (ii=istart; ii<iend+1; ii++) {
1975             for (jj=jstart; jj<jend+1; jj++) {
1976               for (kk=kstart; kk<kend+1; kk++) {
1977                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1978                   cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1979                 }
1980               }
1981             }
1982           }
1983           PetscCall(MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES));
1984         }
1985       }
1986     }
1987     PetscCall(PetscFree(values));
1988     /* do not copy values to GPU since they are all zero and not yet needed there */
1989     PetscCall(MatBindToCPU(J,PETSC_TRUE));
1990     PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY));
1991     PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY));
1992     PetscCall(MatBindToCPU(J,PETSC_FALSE));
1993     PetscCall(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
1994   }
1995   PetscCall(PetscFree(cols));
1996   PetscFunctionReturn(0);
1997 }
1998 
1999 /*
2000   This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to
2001   identify in the local ordering with periodic domain.
2002 */
2003 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog,PetscInt *row,PetscInt *cnt,PetscInt col[])
2004 {
2005   PetscInt       i,n;
2006 
2007   PetscFunctionBegin;
2008   PetscCall(ISLocalToGlobalMappingApplyBlock(ltog,1,row,row));
2009   PetscCall(ISLocalToGlobalMappingApplyBlock(ltog,*cnt,col,col));
2010   for (i=0,n=0; i<*cnt; i++) {
2011     if (col[i] >= *row) col[n++] = col[i];
2012   }
2013   *cnt = n;
2014   PetscFunctionReturn(0);
2015 }
2016 
2017 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da,Mat J)
2018 {
2019   PetscErrorCode         ierr;
2020   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
2021   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
2022   PetscInt               istart,iend,jstart,jend,ii,jj;
2023   MPI_Comm               comm;
2024   PetscScalar            *values;
2025   DMBoundaryType         bx,by;
2026   DMDAStencilType        st;
2027   ISLocalToGlobalMapping ltog;
2028 
2029   PetscFunctionBegin;
2030   /*
2031      nc - number of components per grid point
2032      col - number of colors needed in one direction for single component problem
2033   */
2034   PetscCall(DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st));
2035   col  = 2*s + 1;
2036 
2037   PetscCall(DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL));
2038   PetscCall(DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL));
2039   PetscCall(PetscObjectGetComm((PetscObject)da,&comm));
2040 
2041   PetscCall(PetscMalloc1(col*col*nc*nc,&cols));
2042 
2043   PetscCall(DMGetLocalToGlobalMapping(da,&ltog));
2044 
2045   /* determine the matrix preallocation information */
2046   ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);PetscCall(ierr);
2047   for (i=xs; i<xs+nx; i++) {
2048     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2049     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2050     for (j=ys; j<ys+ny; j++) {
2051       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2052       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2053       slot   = i - gxs + gnx*(j - gys);
2054 
2055       /* Find block columns in block row */
2056       cnt = 0;
2057       for (ii=istart; ii<iend+1; ii++) {
2058         for (jj=jstart; jj<jend+1; jj++) {
2059           if (st == DMDA_STENCIL_BOX || !ii || !jj) {
2060             cols[cnt++] = slot + ii + gnx*jj;
2061           }
2062         }
2063       }
2064       PetscCall(L2GFilterUpperTriangular(ltog,&slot,&cnt,cols));
2065       PetscCall(MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz));
2066     }
2067   }
2068   PetscCall(MatSeqSBAIJSetPreallocation(J,nc,0,dnz));
2069   PetscCall(MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz));
2070   ierr = MatPreallocateFinalize(dnz,onz);PetscCall(ierr);
2071 
2072   PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog));
2073 
2074   /*
2075     For each node in the grid: we get the neighbors in the local (on processor ordering
2076     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2077     PETSc ordering.
2078   */
2079   if (!da->prealloc_only) {
2080     PetscCall(PetscCalloc1(col*col*nc*nc,&values));
2081     for (i=xs; i<xs+nx; i++) {
2082       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2083       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2084       for (j=ys; j<ys+ny; j++) {
2085         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2086         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2087         slot   = i - gxs + gnx*(j - gys);
2088 
2089         /* Find block columns in block row */
2090         cnt = 0;
2091         for (ii=istart; ii<iend+1; ii++) {
2092           for (jj=jstart; jj<jend+1; jj++) {
2093             if (st == DMDA_STENCIL_BOX || !ii || !jj) {
2094               cols[cnt++] = slot + ii + gnx*jj;
2095             }
2096           }
2097         }
2098         PetscCall(L2GFilterUpperTriangular(ltog,&slot,&cnt,cols));
2099         PetscCall(MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES));
2100       }
2101     }
2102     PetscCall(PetscFree(values));
2103     /* do not copy values to GPU since they are all zero and not yet needed there */
2104     PetscCall(MatBindToCPU(J,PETSC_TRUE));
2105     PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY));
2106     PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY));
2107     PetscCall(MatBindToCPU(J,PETSC_FALSE));
2108     PetscCall(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
2109   }
2110   PetscCall(PetscFree(cols));
2111   PetscFunctionReturn(0);
2112 }
2113 
2114 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da,Mat J)
2115 {
2116   PetscErrorCode         ierr;
2117   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
2118   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
2119   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
2120   MPI_Comm               comm;
2121   PetscScalar            *values;
2122   DMBoundaryType         bx,by,bz;
2123   DMDAStencilType        st;
2124   ISLocalToGlobalMapping ltog;
2125 
2126   PetscFunctionBegin;
2127   /*
2128      nc - number of components per grid point
2129      col - number of colors needed in one direction for single component problem
2130   */
2131   PetscCall(DMDAGetInfo(da,&dim,&m,&n,&p,NULL,NULL,NULL,&nc,&s,&bx,&by,&bz,&st));
2132   col  = 2*s + 1;
2133 
2134   PetscCall(DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz));
2135   PetscCall(DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz));
2136   PetscCall(PetscObjectGetComm((PetscObject)da,&comm));
2137 
2138   /* create the matrix */
2139   PetscCall(PetscMalloc1(col*col*col,&cols));
2140 
2141   PetscCall(DMGetLocalToGlobalMapping(da,&ltog));
2142 
2143   /* determine the matrix preallocation information */
2144   ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);PetscCall(ierr);
2145   for (i=xs; i<xs+nx; i++) {
2146     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2147     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2148     for (j=ys; j<ys+ny; j++) {
2149       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2150       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2151       for (k=zs; k<zs+nz; k++) {
2152         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2153         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2154 
2155         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2156 
2157         /* Find block columns in block row */
2158         cnt = 0;
2159         for (ii=istart; ii<iend+1; ii++) {
2160           for (jj=jstart; jj<jend+1; jj++) {
2161             for (kk=kstart; kk<kend+1; kk++) {
2162               if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
2163                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
2164               }
2165             }
2166           }
2167         }
2168         PetscCall(L2GFilterUpperTriangular(ltog,&slot,&cnt,cols));
2169         PetscCall(MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz));
2170       }
2171     }
2172   }
2173   PetscCall(MatSeqSBAIJSetPreallocation(J,nc,0,dnz));
2174   PetscCall(MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz));
2175   ierr = MatPreallocateFinalize(dnz,onz);PetscCall(ierr);
2176 
2177   PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog));
2178 
2179   /*
2180     For each node in the grid: we get the neighbors in the local (on processor ordering
2181     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2182     PETSc ordering.
2183   */
2184   if (!da->prealloc_only) {
2185     PetscCall(PetscCalloc1(col*col*col*nc*nc,&values));
2186     for (i=xs; i<xs+nx; i++) {
2187       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2188       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2189       for (j=ys; j<ys+ny; j++) {
2190         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2191         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2192         for (k=zs; k<zs+nz; k++) {
2193           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2194           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2195 
2196           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2197 
2198           cnt = 0;
2199           for (ii=istart; ii<iend+1; ii++) {
2200             for (jj=jstart; jj<jend+1; jj++) {
2201               for (kk=kstart; kk<kend+1; kk++) {
2202                 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
2203                   cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
2204                 }
2205               }
2206             }
2207           }
2208           PetscCall(L2GFilterUpperTriangular(ltog,&slot,&cnt,cols));
2209           PetscCall(MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES));
2210         }
2211       }
2212     }
2213     PetscCall(PetscFree(values));
2214     /* do not copy values to GPU since they are all zero and not yet needed there */
2215     PetscCall(MatBindToCPU(J,PETSC_TRUE));
2216     PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY));
2217     PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY));
2218     PetscCall(MatBindToCPU(J,PETSC_FALSE));
2219     PetscCall(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
2220   }
2221   PetscCall(PetscFree(cols));
2222   PetscFunctionReturn(0);
2223 }
2224 
2225 /* ---------------------------------------------------------------------------------*/
2226 
2227 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da,Mat J)
2228 {
2229   PetscErrorCode         ierr;
2230   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
2231   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt, maxcnt = 0,l,p,*dnz,*onz;
2232   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
2233   DM_DA                  *dd = (DM_DA*)da->data;
2234   PetscInt               ifill_col,*dfill = dd->dfill,*ofill = dd->ofill;
2235   MPI_Comm               comm;
2236   PetscScalar            *values;
2237   DMBoundaryType         bx,by,bz;
2238   ISLocalToGlobalMapping ltog;
2239   DMDAStencilType        st;
2240   PetscBool              removedups = PETSC_FALSE;
2241 
2242   PetscFunctionBegin;
2243   /*
2244          nc - number of components per grid point
2245          col - number of colors needed in one direction for single component problem
2246 
2247   */
2248   PetscCall(DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st));
2249   col  = 2*s + 1;
2250   PetscCheckFalse(bx == DM_BOUNDARY_PERIODIC && (m % col),PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
2251                  by 2*stencil_width + 1\n");
2252   PetscCheckFalse(by == DM_BOUNDARY_PERIODIC && (n % col),PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
2253                  by 2*stencil_width + 1\n");
2254   PetscCheckFalse(bz == DM_BOUNDARY_PERIODIC && (p % col),PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
2255                  by 2*stencil_width + 1\n");
2256 
2257   /*
2258        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
2259        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
2260   */
2261   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
2262   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
2263   if (P == 1 && 2*s >= p) removedups = PETSC_TRUE;
2264 
2265   PetscCall(DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz));
2266   PetscCall(DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz));
2267   PetscCall(PetscObjectGetComm((PetscObject)da,&comm));
2268 
2269   PetscCall(PetscMalloc1(col*col*col*nc,&cols));
2270   PetscCall(DMGetLocalToGlobalMapping(da,&ltog));
2271 
2272   /* determine the matrix preallocation information */
2273   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);PetscCall(ierr);
2274 
2275   PetscCall(MatSetBlockSize(J,nc));
2276   for (i=xs; i<xs+nx; i++) {
2277     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2278     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2279     for (j=ys; j<ys+ny; j++) {
2280       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2281       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2282       for (k=zs; k<zs+nz; k++) {
2283         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2284         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2285 
2286         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2287 
2288         for (l=0; l<nc; l++) {
2289           cnt = 0;
2290           for (ii=istart; ii<iend+1; ii++) {
2291             for (jj=jstart; jj<jend+1; jj++) {
2292               for (kk=kstart; kk<kend+1; kk++) {
2293                 if (ii || jj || kk) {
2294                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
2295                     for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2296                   }
2297                 } else {
2298                   if (dfill) {
2299                     for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2300                   } else {
2301                     for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2302                   }
2303                 }
2304               }
2305             }
2306           }
2307           row  = l + nc*(slot);
2308           maxcnt = PetscMax(maxcnt,cnt);
2309           if (removedups) {
2310             PetscCall(MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz));
2311           } else {
2312             PetscCall(MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz));
2313           }
2314         }
2315       }
2316     }
2317   }
2318   PetscCall(MatSeqAIJSetPreallocation(J,0,dnz));
2319   PetscCall(MatMPIAIJSetPreallocation(J,0,dnz,0,onz));
2320   ierr = MatPreallocateFinalize(dnz,onz);PetscCall(ierr);
2321   PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog));
2322 
2323   /*
2324     For each node in the grid: we get the neighbors in the local (on processor ordering
2325     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2326     PETSc ordering.
2327   */
2328   if (!da->prealloc_only) {
2329     PetscCall(PetscCalloc1(maxcnt,&values));
2330     for (i=xs; i<xs+nx; i++) {
2331       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2332       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2333       for (j=ys; j<ys+ny; j++) {
2334         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2335         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2336         for (k=zs; k<zs+nz; k++) {
2337           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2338           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2339 
2340           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2341 
2342           for (l=0; l<nc; l++) {
2343             cnt = 0;
2344             for (ii=istart; ii<iend+1; ii++) {
2345               for (jj=jstart; jj<jend+1; jj++) {
2346                 for (kk=kstart; kk<kend+1; kk++) {
2347                   if (ii || jj || kk) {
2348                     if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
2349                       for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2350                     }
2351                   } else {
2352                     if (dfill) {
2353                       for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2354                     } else {
2355                       for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2356                     }
2357                   }
2358                 }
2359               }
2360             }
2361             row  = l + nc*(slot);
2362             PetscCall(MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES));
2363           }
2364         }
2365       }
2366     }
2367     PetscCall(PetscFree(values));
2368     /* do not copy values to GPU since they are all zero and not yet needed there */
2369     PetscCall(MatBindToCPU(J,PETSC_TRUE));
2370     PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY));
2371     PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY));
2372     PetscCall(MatBindToCPU(J,PETSC_FALSE));
2373     PetscCall(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
2374   }
2375   PetscCall(PetscFree(cols));
2376   PetscFunctionReturn(0);
2377 }
2378