xref: /petsc/src/dm/impls/da/fdda.c (revision b94d7ded0a05f1bbd5e48daa6f92b28259c75b44)
1 
2 #include <petsc/private/dmdaimpl.h> /*I      "petscdmda.h"     I*/
3 #include <petscmat.h>
4 #include <petscbt.h>
5 
6 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM,ISColoringType,ISColoring*);
7 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM,ISColoringType,ISColoring*);
8 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM,ISColoringType,ISColoring*);
9 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM,ISColoringType,ISColoring*);
10 
11 /*
12    For ghost i that may be negative or greater than the upper bound this
13   maps it into the 0:m-1 range using periodicity
14 */
15 #define SetInRange(i,m) ((i < 0) ? m+i : ((i >= m) ? i-m : i))
16 
17 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill,PetscInt w,PetscInt **rfill)
18 {
19   PetscInt       i,j,nz,*fill;
20 
21   PetscFunctionBegin;
22   if (!dfill) PetscFunctionReturn(0);
23 
24   /* count number nonzeros */
25   nz = 0;
26   for (i=0; i<w; i++) {
27     for (j=0; j<w; j++) {
28       if (dfill[w*i+j]) nz++;
29     }
30   }
31   PetscCall(PetscMalloc1(nz + w + 1,&fill));
32   /* construct modified CSR storage of nonzero structure */
33   /*  fill[0 -- w] marks starts of each row of column indices (and end of last row)
34    so fill[1] - fill[0] gives number of nonzeros in first row etc */
35   nz = w + 1;
36   for (i=0; i<w; i++) {
37     fill[i] = nz;
38     for (j=0; j<w; j++) {
39       if (dfill[w*i+j]) {
40         fill[nz] = j;
41         nz++;
42       }
43     }
44   }
45   fill[w] = nz;
46 
47   *rfill = fill;
48   PetscFunctionReturn(0);
49 }
50 
51 static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse,PetscInt w,PetscInt **rfill)
52 {
53   PetscInt       nz;
54 
55   PetscFunctionBegin;
56   if (!dfillsparse) PetscFunctionReturn(0);
57 
58   /* Determine number of non-zeros */
59   nz = (dfillsparse[w] - w - 1);
60 
61   /* Allocate space for our copy of the given sparse matrix representation. */
62   PetscCall(PetscMalloc1(nz + w + 1,rfill));
63   PetscCall(PetscArraycpy(*rfill,dfillsparse,nz+w+1));
64   PetscFunctionReturn(0);
65 }
66 
67 static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd)
68 {
69   PetscInt       i,k,cnt = 1;
70 
71   PetscFunctionBegin;
72 
73   /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of
74    columns to the left with any nonzeros in them plus 1 */
75   PetscCall(PetscCalloc1(dd->w,&dd->ofillcols));
76   for (i=0; i<dd->w; i++) {
77     for (k=dd->ofill[i]; k<dd->ofill[i+1]; k++) dd->ofillcols[dd->ofill[k]] = 1;
78   }
79   for (i=0; i<dd->w; i++) {
80     if (dd->ofillcols[i]) {
81       dd->ofillcols[i] = cnt++;
82     }
83   }
84   PetscFunctionReturn(0);
85 }
86 
87 /*@
88     DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem
89     of the matrix returned by DMCreateMatrix().
90 
91     Logically Collective on da
92 
93     Input Parameters:
94 +   da - the distributed array
95 .   dfill - the fill pattern in the diagonal block (may be NULL, means use dense block)
96 -   ofill - the fill pattern in the off-diagonal blocks
97 
98     Level: developer
99 
100     Notes:
101     This only makes sense when you are doing multicomponent problems but using the
102        MPIAIJ matrix format
103 
104            The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries
105        representing coupling and 0 entries for missing coupling. For example
106 $             dfill[9] = {1, 0, 0,
107 $                         1, 1, 0,
108 $                         0, 1, 1}
109        means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with
110        itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the
111        diagonal block).
112 
113      DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
114      can be represented in the dfill, ofill format
115 
116    Contributed by Glenn Hammond
117 
118 .seealso `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()`
119 
120 @*/
121 PetscErrorCode  DMDASetBlockFills(DM da,const PetscInt *dfill,const PetscInt *ofill)
122 {
123   DM_DA          *dd = (DM_DA*)da->data;
124 
125   PetscFunctionBegin;
126   /* save the given dfill and ofill information */
127   PetscCall(DMDASetBlockFills_Private(dfill,dd->w,&dd->dfill));
128   PetscCall(DMDASetBlockFills_Private(ofill,dd->w,&dd->ofill));
129 
130   /* count nonzeros in ofill columns */
131   PetscCall(DMDASetBlockFills_Private2(dd));
132   PetscFunctionReturn(0);
133 }
134 
135 /*@
136     DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem
137     of the matrix returned by DMCreateMatrix(), using sparse representations
138     of fill patterns.
139 
140     Logically Collective on da
141 
142     Input Parameters:
143 +   da - the distributed array
144 .   dfill - the sparse fill pattern in the diagonal block (may be NULL, means use dense block)
145 -   ofill - the sparse fill pattern in the off-diagonal blocks
146 
147     Level: developer
148 
149     Notes: This only makes sense when you are doing multicomponent problems but using the
150        MPIAIJ matrix format
151 
152            The format for dfill and ofill is a sparse representation of a
153            dof-by-dof matrix with 1 entries representing coupling and 0 entries
154            for missing coupling.  The sparse representation is a 1 dimensional
155            array of length nz + dof + 1, where nz is the number of non-zeros in
156            the matrix.  The first dof entries in the array give the
157            starting array indices of each row's items in the rest of the array,
158            the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array)
159            and the remaining nz items give the column indices of each of
160            the 1s within the logical 2D matrix.  Each row's items within
161            the array are the column indices of the 1s within that row
162            of the 2D matrix.  PETSc developers may recognize that this is the
163            same format as that computed by the DMDASetBlockFills_Private()
164            function from a dense 2D matrix representation.
165 
166      DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
167      can be represented in the dfill, ofill format
168 
169    Contributed by Philip C. Roth
170 
171 .seealso `DMDASetBlockFills()`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()`
172 
173 @*/
174 PetscErrorCode  DMDASetBlockFillsSparse(DM da,const PetscInt *dfillsparse,const PetscInt *ofillsparse)
175 {
176   DM_DA          *dd = (DM_DA*)da->data;
177 
178   PetscFunctionBegin;
179   /* save the given dfill and ofill information */
180   PetscCall(DMDASetBlockFillsSparse_Private(dfillsparse,dd->w,&dd->dfill));
181   PetscCall(DMDASetBlockFillsSparse_Private(ofillsparse,dd->w,&dd->ofill));
182 
183   /* count nonzeros in ofill columns */
184   PetscCall(DMDASetBlockFills_Private2(dd));
185   PetscFunctionReturn(0);
186 }
187 
188 PetscErrorCode  DMCreateColoring_DA(DM da,ISColoringType ctype,ISColoring *coloring)
189 {
190   PetscInt         dim,m,n,p,nc;
191   DMBoundaryType   bx,by,bz;
192   MPI_Comm         comm;
193   PetscMPIInt      size;
194   PetscBool        isBAIJ;
195   DM_DA            *dd = (DM_DA*)da->data;
196 
197   PetscFunctionBegin;
198   /*
199                                   m
200           ------------------------------------------------------
201          |                                                     |
202          |                                                     |
203          |               ----------------------                |
204          |               |                    |                |
205       n  |           yn  |                    |                |
206          |               |                    |                |
207          |               .---------------------                |
208          |             (xs,ys)     xn                          |
209          |            .                                        |
210          |         (gxs,gys)                                   |
211          |                                                     |
212           -----------------------------------------------------
213   */
214 
215   /*
216          nc - number of components per grid point
217          col - number of colors needed in one direction for single component problem
218 
219   */
220   PetscCall(DMDAGetInfo(da,&dim,NULL,NULL,NULL,&m,&n,&p,&nc,NULL,&bx,&by,&bz,NULL));
221 
222   PetscCall(PetscObjectGetComm((PetscObject)da,&comm));
223   PetscCallMPI(MPI_Comm_size(comm,&size));
224   if (ctype == IS_COLORING_LOCAL) {
225     if (size == 1) {
226       ctype = IS_COLORING_GLOBAL;
227     } else if (dim > 1) {
228       if ((m==1 && bx == DM_BOUNDARY_PERIODIC) || (n==1 && by == DM_BOUNDARY_PERIODIC) || (p==1 && bz == DM_BOUNDARY_PERIODIC)) {
229         SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain  on the same process");
230       }
231     }
232   }
233 
234   /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ
235      matrices is for the blocks, not the individual matrix elements  */
236   PetscCall(PetscStrbeginswith(da->mattype,MATBAIJ,&isBAIJ));
237   if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype,MATMPIBAIJ,&isBAIJ));
238   if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype,MATSEQBAIJ,&isBAIJ));
239   if (isBAIJ) {
240     dd->w  = 1;
241     dd->xs = dd->xs/nc;
242     dd->xe = dd->xe/nc;
243     dd->Xs = dd->Xs/nc;
244     dd->Xe = dd->Xe/nc;
245   }
246 
247   /*
248      We do not provide a getcoloring function in the DMDA operations because
249    the basic DMDA does not know about matrices. We think of DMDA as being
250    more low-level then matrices.
251   */
252   if (dim == 1) PetscCall(DMCreateColoring_DA_1d_MPIAIJ(da,ctype,coloring));
253   else if (dim == 2) PetscCall(DMCreateColoring_DA_2d_MPIAIJ(da,ctype,coloring));
254   else if (dim == 3) PetscCall(DMCreateColoring_DA_3d_MPIAIJ(da,ctype,coloring));
255   else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not done for %" PetscInt_FMT " dimension, send us mail petsc-maint@mcs.anl.gov for code",dim);
256   if (isBAIJ) {
257     dd->w  = nc;
258     dd->xs = dd->xs*nc;
259     dd->xe = dd->xe*nc;
260     dd->Xs = dd->Xs*nc;
261     dd->Xe = dd->Xe*nc;
262   }
263   PetscFunctionReturn(0);
264 }
265 
266 /* ---------------------------------------------------------------------------------*/
267 
268 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
269 {
270   PetscInt        xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,M,N,dim,s,k,nc,col;
271   PetscInt        ncolors;
272   MPI_Comm        comm;
273   DMBoundaryType  bx,by;
274   DMDAStencilType st;
275   ISColoringValue *colors;
276   DM_DA           *dd = (DM_DA*)da->data;
277 
278   PetscFunctionBegin;
279   /*
280          nc - number of components per grid point
281          col - number of colors needed in one direction for single component problem
282 
283   */
284   PetscCall(DMDAGetInfo(da,&dim,&m,&n,NULL,&M,&N,NULL,&nc,&s,&bx,&by,NULL,&st));
285   col  = 2*s + 1;
286   PetscCall(DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL));
287   PetscCall(DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL));
288   PetscCall(PetscObjectGetComm((PetscObject)da,&comm));
289 
290   /* special case as taught to us by Paul Hovland */
291   if (st == DMDA_STENCIL_STAR && s == 1) {
292     PetscCall(DMCreateColoring_DA_2d_5pt_MPIAIJ(da,ctype,coloring));
293   } else {
294     if (ctype == IS_COLORING_GLOBAL) {
295       if (!dd->localcoloring) {
296         PetscCall(PetscMalloc1(nc*nx*ny,&colors));
297         ii   = 0;
298         for (j=ys; j<ys+ny; j++) {
299           for (i=xs; i<xs+nx; i++) {
300             for (k=0; k<nc; k++) {
301               colors[ii++] = k + nc*((i % col) + col*(j % col));
302             }
303           }
304         }
305         ncolors = nc + nc*(col-1 + col*(col-1));
306         PetscCall(ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring));
307       }
308       *coloring = dd->localcoloring;
309     } else if (ctype == IS_COLORING_LOCAL) {
310       if (!dd->ghostedcoloring) {
311         PetscCall(PetscMalloc1(nc*gnx*gny,&colors));
312         ii   = 0;
313         for (j=gys; j<gys+gny; j++) {
314           for (i=gxs; i<gxs+gnx; i++) {
315             for (k=0; k<nc; k++) {
316               /* the complicated stuff is to handle periodic boundaries */
317               colors[ii++] = k + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col));
318             }
319           }
320         }
321         ncolors = nc + nc*(col - 1 + col*(col-1));
322         PetscCall(ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring));
323         /* PetscIntView(ncolors,(PetscInt*)colors,0); */
324 
325         PetscCall(ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL));
326       }
327       *coloring = dd->ghostedcoloring;
328     } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
329   }
330   PetscCall(ISColoringReference(*coloring));
331   PetscFunctionReturn(0);
332 }
333 
334 /* ---------------------------------------------------------------------------------*/
335 
336 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
337 {
338   PetscInt        xs,ys,nx,ny,i,j,gxs,gys,gnx,gny,m,n,p,dim,s,k,nc,col,zs,gzs,ii,l,nz,gnz,M,N,P;
339   PetscInt        ncolors;
340   MPI_Comm        comm;
341   DMBoundaryType  bx,by,bz;
342   DMDAStencilType st;
343   ISColoringValue *colors;
344   DM_DA           *dd = (DM_DA*)da->data;
345 
346   PetscFunctionBegin;
347   /*
348          nc - number of components per grid point
349          col - number of colors needed in one direction for single component problem
350 
351   */
352   PetscCall(DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st));
353   col  = 2*s + 1;
354   PetscCall(DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz));
355   PetscCall(DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz));
356   PetscCall(PetscObjectGetComm((PetscObject)da,&comm));
357 
358   /* create the coloring */
359   if (ctype == IS_COLORING_GLOBAL) {
360     if (!dd->localcoloring) {
361       PetscCall(PetscMalloc1(nc*nx*ny*nz,&colors));
362       ii   = 0;
363       for (k=zs; k<zs+nz; k++) {
364         for (j=ys; j<ys+ny; j++) {
365           for (i=xs; i<xs+nx; i++) {
366             for (l=0; l<nc; l++) {
367               colors[ii++] = l + nc*((i % col) + col*(j % col) + col*col*(k % col));
368             }
369           }
370         }
371       }
372       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
373       PetscCall(ISColoringCreate(comm,ncolors,nc*nx*ny*nz,colors,PETSC_OWN_POINTER,&dd->localcoloring));
374     }
375     *coloring = dd->localcoloring;
376   } else if (ctype == IS_COLORING_LOCAL) {
377     if (!dd->ghostedcoloring) {
378       PetscCall(PetscMalloc1(nc*gnx*gny*gnz,&colors));
379       ii   = 0;
380       for (k=gzs; k<gzs+gnz; k++) {
381         for (j=gys; j<gys+gny; j++) {
382           for (i=gxs; i<gxs+gnx; i++) {
383             for (l=0; l<nc; l++) {
384               /* the complicated stuff is to handle periodic boundaries */
385               colors[ii++] = l + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col) + col*col*(SetInRange(k,p) % col));
386             }
387           }
388         }
389       }
390       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
391       PetscCall(ISColoringCreate(comm,ncolors,nc*gnx*gny*gnz,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring));
392       PetscCall(ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL));
393     }
394     *coloring = dd->ghostedcoloring;
395   } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
396   PetscCall(ISColoringReference(*coloring));
397   PetscFunctionReturn(0);
398 }
399 
400 /* ---------------------------------------------------------------------------------*/
401 
402 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
403 {
404   PetscInt        xs,nx,i,i1,gxs,gnx,l,m,M,dim,s,nc,col;
405   PetscInt        ncolors;
406   MPI_Comm        comm;
407   DMBoundaryType  bx;
408   ISColoringValue *colors;
409   DM_DA           *dd = (DM_DA*)da->data;
410 
411   PetscFunctionBegin;
412   /*
413          nc - number of components per grid point
414          col - number of colors needed in one direction for single component problem
415 
416   */
417   PetscCall(DMDAGetInfo(da,&dim,&m,NULL,NULL,&M,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL));
418   col  = 2*s + 1;
419   PetscCall(DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL));
420   PetscCall(DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL));
421   PetscCall(PetscObjectGetComm((PetscObject)da,&comm));
422 
423   /* create the coloring */
424   if (ctype == IS_COLORING_GLOBAL) {
425     if (!dd->localcoloring) {
426       PetscCall(PetscMalloc1(nc*nx,&colors));
427       if (dd->ofillcols) {
428         PetscInt tc = 0;
429         for (i=0; i<nc; i++) tc += (PetscInt) (dd->ofillcols[i] > 0);
430         i1 = 0;
431         for (i=xs; i<xs+nx; i++) {
432           for (l=0; l<nc; l++) {
433             if (dd->ofillcols[l] && (i % col)) {
434               colors[i1++] =  nc - 1 + tc*((i % col) - 1) + dd->ofillcols[l];
435             } else {
436               colors[i1++] = l;
437             }
438           }
439         }
440         ncolors = nc + 2*s*tc;
441       } else {
442         i1 = 0;
443         for (i=xs; i<xs+nx; i++) {
444           for (l=0; l<nc; l++) {
445             colors[i1++] = l + nc*(i % col);
446           }
447         }
448         ncolors = nc + nc*(col-1);
449       }
450       PetscCall(ISColoringCreate(comm,ncolors,nc*nx,colors,PETSC_OWN_POINTER,&dd->localcoloring));
451     }
452     *coloring = dd->localcoloring;
453   } else if (ctype == IS_COLORING_LOCAL) {
454     if (!dd->ghostedcoloring) {
455       PetscCall(PetscMalloc1(nc*gnx,&colors));
456       i1   = 0;
457       for (i=gxs; i<gxs+gnx; i++) {
458         for (l=0; l<nc; l++) {
459           /* the complicated stuff is to handle periodic boundaries */
460           colors[i1++] = l + nc*(SetInRange(i,m) % col);
461         }
462       }
463       ncolors = nc + nc*(col-1);
464       PetscCall(ISColoringCreate(comm,ncolors,nc*gnx,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring));
465       PetscCall(ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL));
466     }
467     *coloring = dd->ghostedcoloring;
468   } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
469   PetscCall(ISColoringReference(*coloring));
470   PetscFunctionReturn(0);
471 }
472 
473 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
474 {
475   PetscInt        xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,dim,s,k,nc;
476   PetscInt        ncolors;
477   MPI_Comm        comm;
478   DMBoundaryType  bx,by;
479   ISColoringValue *colors;
480   DM_DA           *dd = (DM_DA*)da->data;
481 
482   PetscFunctionBegin;
483   /*
484          nc - number of components per grid point
485          col - number of colors needed in one direction for single component problem
486 
487   */
488   PetscCall(DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,NULL));
489   PetscCall(DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL));
490   PetscCall(DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL));
491   PetscCall(PetscObjectGetComm((PetscObject)da,&comm));
492   /* create the coloring */
493   if (ctype == IS_COLORING_GLOBAL) {
494     if (!dd->localcoloring) {
495       PetscCall(PetscMalloc1(nc*nx*ny,&colors));
496       ii   = 0;
497       for (j=ys; j<ys+ny; j++) {
498         for (i=xs; i<xs+nx; i++) {
499           for (k=0; k<nc; k++) {
500             colors[ii++] = k + nc*((3*j+i) % 5);
501           }
502         }
503       }
504       ncolors = 5*nc;
505       PetscCall(ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring));
506     }
507     *coloring = dd->localcoloring;
508   } else if (ctype == IS_COLORING_LOCAL) {
509     if (!dd->ghostedcoloring) {
510       PetscCall(PetscMalloc1(nc*gnx*gny,&colors));
511       ii = 0;
512       for (j=gys; j<gys+gny; j++) {
513         for (i=gxs; i<gxs+gnx; i++) {
514           for (k=0; k<nc; k++) {
515             colors[ii++] = k + nc*((3*SetInRange(j,n) + SetInRange(i,m)) % 5);
516           }
517         }
518       }
519       ncolors = 5*nc;
520       PetscCall(ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring));
521       PetscCall(ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL));
522     }
523     *coloring = dd->ghostedcoloring;
524   } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
525   PetscFunctionReturn(0);
526 }
527 
528 /* =========================================================================== */
529 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM,Mat);
530 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM,Mat);
531 extern PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM,Mat);
532 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM,Mat);
533 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM,Mat);
534 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM,Mat);
535 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM,Mat);
536 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM,Mat);
537 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM,Mat);
538 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM,Mat);
539 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM,Mat);
540 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM,Mat);
541 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM,Mat);
542 extern PetscErrorCode DMCreateMatrix_DA_IS(DM,Mat);
543 
544 /*@C
545    MatSetupDM - Sets the DMDA that is to be used by the HYPRE_StructMatrix PETSc matrix
546 
547    Logically Collective on mat
548 
549    Input Parameters:
550 +  mat - the matrix
551 -  da - the da
552 
553    Level: intermediate
554 
555 @*/
556 PetscErrorCode MatSetupDM(Mat mat,DM da)
557 {
558   PetscFunctionBegin;
559   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
560   PetscValidHeaderSpecificType(da,DM_CLASSID,2,DMDA);
561   PetscTryMethod(mat,"MatSetupDM_C",(Mat,DM),(mat,da));
562   PetscFunctionReturn(0);
563 }
564 
565 PetscErrorCode  MatView_MPI_DA(Mat A,PetscViewer viewer)
566 {
567   DM                da;
568   const char        *prefix;
569   Mat               Anatural;
570   AO                ao;
571   PetscInt          rstart,rend,*petsc,i;
572   IS                is;
573   MPI_Comm          comm;
574   PetscViewerFormat format;
575 
576   PetscFunctionBegin;
577   /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */
578   PetscCall(PetscViewerGetFormat(viewer,&format));
579   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(0);
580 
581   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
582   PetscCall(MatGetDM(A, &da));
583   PetscCheck(da,PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
584 
585   PetscCall(DMDAGetAO(da,&ao));
586   PetscCall(MatGetOwnershipRange(A,&rstart,&rend));
587   PetscCall(PetscMalloc1(rend-rstart,&petsc));
588   for (i=rstart; i<rend; i++) petsc[i-rstart] = i;
589   PetscCall(AOApplicationToPetsc(ao,rend-rstart,petsc));
590   PetscCall(ISCreateGeneral(comm,rend-rstart,petsc,PETSC_OWN_POINTER,&is));
591 
592   /* call viewer on natural ordering */
593   PetscCall(MatCreateSubMatrix(A,is,is,MAT_INITIAL_MATRIX,&Anatural));
594   PetscCall(ISDestroy(&is));
595   PetscCall(PetscObjectGetOptionsPrefix((PetscObject)A,&prefix));
596   PetscCall(PetscObjectSetOptionsPrefix((PetscObject)Anatural,prefix));
597   PetscCall(PetscObjectSetName((PetscObject)Anatural,((PetscObject)A)->name));
598   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE;
599   PetscCall(MatView(Anatural,viewer));
600   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE;
601   PetscCall(MatDestroy(&Anatural));
602   PetscFunctionReturn(0);
603 }
604 
605 PetscErrorCode  MatLoad_MPI_DA(Mat A,PetscViewer viewer)
606 {
607   DM             da;
608   Mat            Anatural,Aapp;
609   AO             ao;
610   PetscInt       rstart,rend,*app,i,m,n,M,N;
611   IS             is;
612   MPI_Comm       comm;
613 
614   PetscFunctionBegin;
615   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
616   PetscCall(MatGetDM(A, &da));
617   PetscCheck(da,PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
618 
619   /* Load the matrix in natural ordering */
620   PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&Anatural));
621   PetscCall(MatSetType(Anatural,((PetscObject)A)->type_name));
622   PetscCall(MatGetSize(A,&M,&N));
623   PetscCall(MatGetLocalSize(A,&m,&n));
624   PetscCall(MatSetSizes(Anatural,m,n,M,N));
625   PetscCall(MatLoad(Anatural,viewer));
626 
627   /* Map natural ordering to application ordering and create IS */
628   PetscCall(DMDAGetAO(da,&ao));
629   PetscCall(MatGetOwnershipRange(Anatural,&rstart,&rend));
630   PetscCall(PetscMalloc1(rend-rstart,&app));
631   for (i=rstart; i<rend; i++) app[i-rstart] = i;
632   PetscCall(AOPetscToApplication(ao,rend-rstart,app));
633   PetscCall(ISCreateGeneral(comm,rend-rstart,app,PETSC_OWN_POINTER,&is));
634 
635   /* Do permutation and replace header */
636   PetscCall(MatCreateSubMatrix(Anatural,is,is,MAT_INITIAL_MATRIX,&Aapp));
637   PetscCall(MatHeaderReplace(A,&Aapp));
638   PetscCall(ISDestroy(&is));
639   PetscCall(MatDestroy(&Anatural));
640   PetscFunctionReturn(0);
641 }
642 
643 PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J)
644 {
645   PetscInt       dim,dof,nx,ny,nz,dims[3],starts[3],M,N,P;
646   Mat            A;
647   MPI_Comm       comm;
648   MatType        Atype;
649   void           (*aij)(void)=NULL,(*baij)(void)=NULL,(*sbaij)(void)=NULL,(*sell)(void)=NULL,(*is)(void)=NULL;
650   MatType        mtype;
651   PetscMPIInt    size;
652   DM_DA          *dd = (DM_DA*)da->data;
653 
654   PetscFunctionBegin;
655   PetscCall(MatInitializePackage());
656   mtype = da->mattype;
657 
658   /*
659                                   m
660           ------------------------------------------------------
661          |                                                     |
662          |                                                     |
663          |               ----------------------                |
664          |               |                    |                |
665       n  |           ny  |                    |                |
666          |               |                    |                |
667          |               .---------------------                |
668          |             (xs,ys)     nx                          |
669          |            .                                        |
670          |         (gxs,gys)                                   |
671          |                                                     |
672           -----------------------------------------------------
673   */
674 
675   /*
676          nc - number of components per grid point
677          col - number of colors needed in one direction for single component problem
678 
679   */
680   M   = dd->M;
681   N   = dd->N;
682   P   = dd->P;
683   dim = da->dim;
684   dof = dd->w;
685   /* PetscCall(DMDAGetInfo(da,&dim,&M,&N,&P,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL)); */
686   PetscCall(DMDAGetCorners(da,NULL,NULL,NULL,&nx,&ny,&nz));
687   PetscCall(PetscObjectGetComm((PetscObject)da,&comm));
688   PetscCall(MatCreate(comm,&A));
689   PetscCall(MatSetSizes(A,dof*nx*ny*nz,dof*nx*ny*nz,dof*M*N*P,dof*M*N*P));
690   PetscCall(MatSetType(A,mtype));
691   PetscCall(MatSetFromOptions(A));
692   if (dof*nx*ny*nz < da->bind_below) {
693     PetscCall(MatSetBindingPropagates(A,PETSC_TRUE));
694     PetscCall(MatBindToCPU(A,PETSC_TRUE));
695   }
696   PetscCall(MatSetDM(A,da));
697   if (da->structure_only) PetscCall(MatSetOption(A,MAT_STRUCTURE_ONLY,PETSC_TRUE));
698   PetscCall(MatGetType(A,&Atype));
699   /*
700      We do not provide a getmatrix function in the DMDA operations because
701    the basic DMDA does not know about matrices. We think of DMDA as being more
702    more low-level than matrices. This is kind of cheating but, cause sometimes
703    we think of DMDA has higher level than matrices.
704 
705      We could switch based on Atype (or mtype), but we do not since the
706    specialized setting routines depend only on the particular preallocation
707    details of the matrix, not the type itself.
708   */
709   PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJSetPreallocation_C",&aij));
710   if (!aij) {
711     PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJSetPreallocation_C",&aij));
712   }
713   if (!aij) {
714     PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatMPIBAIJSetPreallocation_C",&baij));
715     if (!baij) {
716       PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",&baij));
717     }
718     if (!baij) {
719       PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatMPISBAIJSetPreallocation_C",&sbaij));
720       if (!sbaij) {
721         PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C",&sbaij));
722       }
723       if (!sbaij) {
724         PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatMPISELLSetPreallocation_C",&sell));
725         if (!sell) {
726           PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatSeqSELLSetPreallocation_C",&sell));
727         }
728       }
729       if (!sell) {
730         PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatISSetPreallocation_C",&is));
731       }
732     }
733   }
734   if (aij) {
735     if (dim == 1) {
736       if (dd->ofill) {
737         PetscCall(DMCreateMatrix_DA_1d_MPIAIJ_Fill(da,A));
738       } else {
739         DMBoundaryType bx;
740         PetscMPIInt  size;
741         PetscCall(DMDAGetInfo(da,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&bx,NULL,NULL,NULL));
742         PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da),&size));
743         if (size == 1 && bx == DM_BOUNDARY_NONE) {
744           PetscCall(DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(da,A));
745         } else {
746           PetscCall(DMCreateMatrix_DA_1d_MPIAIJ(da,A));
747         }
748       }
749     } else if (dim == 2) {
750       if (dd->ofill) {
751         PetscCall(DMCreateMatrix_DA_2d_MPIAIJ_Fill(da,A));
752       } else {
753         PetscCall(DMCreateMatrix_DA_2d_MPIAIJ(da,A));
754       }
755     } else if (dim == 3) {
756       if (dd->ofill) {
757         PetscCall(DMCreateMatrix_DA_3d_MPIAIJ_Fill(da,A));
758       } else {
759         PetscCall(DMCreateMatrix_DA_3d_MPIAIJ(da,A));
760       }
761     }
762   } else if (baij) {
763     if (dim == 2) {
764       PetscCall(DMCreateMatrix_DA_2d_MPIBAIJ(da,A));
765     } else if (dim == 3) {
766       PetscCall(DMCreateMatrix_DA_3d_MPIBAIJ(da,A));
767     } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
768   } else if (sbaij) {
769     if (dim == 2) {
770       PetscCall(DMCreateMatrix_DA_2d_MPISBAIJ(da,A));
771     } else if (dim == 3) {
772       PetscCall(DMCreateMatrix_DA_3d_MPISBAIJ(da,A));
773     } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
774   } else if (sell) {
775      if (dim == 2) {
776        PetscCall(DMCreateMatrix_DA_2d_MPISELL(da,A));
777      } else if (dim == 3) {
778        PetscCall(DMCreateMatrix_DA_3d_MPISELL(da,A));
779      } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
780   } else if (is) {
781     PetscCall(DMCreateMatrix_DA_IS(da,A));
782   } else {
783     ISLocalToGlobalMapping ltog;
784 
785     PetscCall(MatSetBlockSize(A,dof));
786     PetscCall(MatSetUp(A));
787     PetscCall(DMGetLocalToGlobalMapping(da,&ltog));
788     PetscCall(MatSetLocalToGlobalMapping(A,ltog,ltog));
789   }
790   PetscCall(DMDAGetGhostCorners(da,&starts[0],&starts[1],&starts[2],&dims[0],&dims[1],&dims[2]));
791   PetscCall(MatSetStencil(A,dim,dims,starts,dof));
792   PetscCall(MatSetDM(A,da));
793   PetscCallMPI(MPI_Comm_size(comm,&size));
794   if (size > 1) {
795     /* change viewer to display matrix in natural ordering */
796     PetscCall(MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA));
797     PetscCall(MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA));
798   }
799   *J = A;
800   PetscFunctionReturn(0);
801 }
802 
803 /* ---------------------------------------------------------------------------------*/
804 PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
805 
806 PetscErrorCode DMCreateMatrix_DA_IS(DM dm,Mat J)
807 {
808   DM_DA                  *da = (DM_DA*)dm->data;
809   Mat                    lJ,P;
810   ISLocalToGlobalMapping ltog;
811   IS                     is;
812   PetscBT                bt;
813   const PetscInt         *e_loc,*idx;
814   PetscInt               i,nel,nen,nv,dof,*gidx,n,N;
815 
816   /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted)
817      We need to filter out the local indices that are not represented through the DMDAGetElements decomposition */
818   PetscFunctionBegin;
819   dof  = da->w;
820   PetscCall(MatSetBlockSize(J,dof));
821   PetscCall(DMGetLocalToGlobalMapping(dm,&ltog));
822 
823   /* flag local elements indices in local DMDA numbering */
824   PetscCall(ISLocalToGlobalMappingGetSize(ltog,&nv));
825   PetscCall(PetscBTCreate(nv/dof,&bt));
826   PetscCall(DMDAGetElements(dm,&nel,&nen,&e_loc)); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */
827   for (i=0;i<nel*nen;i++) PetscCall(PetscBTSet(bt,e_loc[i]));
828 
829   /* filter out (set to -1) the global indices not used by the local elements */
830   PetscCall(PetscMalloc1(nv/dof,&gidx));
831   PetscCall(ISLocalToGlobalMappingGetBlockIndices(ltog,&idx));
832   PetscCall(PetscArraycpy(gidx,idx,nv/dof));
833   PetscCall(ISLocalToGlobalMappingRestoreBlockIndices(ltog,&idx));
834   for (i=0;i<nv/dof;i++) if (!PetscBTLookup(bt,i)) gidx[i] = -1;
835   PetscCall(PetscBTDestroy(&bt));
836   PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)dm),dof,nv/dof,gidx,PETSC_OWN_POINTER,&is));
837   PetscCall(ISLocalToGlobalMappingCreateIS(is,&ltog));
838   PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog));
839   PetscCall(ISLocalToGlobalMappingDestroy(&ltog));
840   PetscCall(ISDestroy(&is));
841 
842   /* Preallocation */
843   if (dm->prealloc_skip) {
844     PetscCall(MatSetUp(J));
845   } else {
846     PetscCall(MatISGetLocalMat(J,&lJ));
847     PetscCall(MatGetLocalToGlobalMapping(lJ,&ltog,NULL));
848     PetscCall(MatCreate(PetscObjectComm((PetscObject)lJ),&P));
849     PetscCall(MatSetType(P,MATPREALLOCATOR));
850     PetscCall(MatSetLocalToGlobalMapping(P,ltog,ltog));
851     PetscCall(MatGetSize(lJ,&N,NULL));
852     PetscCall(MatGetLocalSize(lJ,&n,NULL));
853     PetscCall(MatSetSizes(P,n,n,N,N));
854     PetscCall(MatSetBlockSize(P,dof));
855     PetscCall(MatSetUp(P));
856     for (i=0;i<nel;i++) {
857       PetscCall(MatSetValuesBlockedLocal(P,nen,e_loc+i*nen,nen,e_loc+i*nen,NULL,INSERT_VALUES));
858     }
859     PetscCall(MatPreallocatorPreallocate(P,(PetscBool)!da->prealloc_only,lJ));
860     PetscCall(MatISRestoreLocalMat(J,&lJ));
861     PetscCall(DMDARestoreElements(dm,&nel,&nen,&e_loc));
862     PetscCall(MatDestroy(&P));
863 
864     PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY));
865     PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY));
866   }
867   PetscFunctionReturn(0);
868 }
869 
870 PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da,Mat J)
871 {
872   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p;
873   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
874   MPI_Comm               comm;
875   PetscScalar            *values;
876   DMBoundaryType         bx,by;
877   ISLocalToGlobalMapping ltog;
878   DMDAStencilType        st;
879 
880   PetscFunctionBegin;
881   /*
882          nc - number of components per grid point
883          col - number of colors needed in one direction for single component problem
884 
885   */
886   PetscCall(DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st));
887   col  = 2*s + 1;
888   PetscCall(DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL));
889   PetscCall(DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL));
890   PetscCall(PetscObjectGetComm((PetscObject)da,&comm));
891 
892   PetscCall(PetscMalloc2(nc,&rows,col*col*nc*nc,&cols));
893   PetscCall(DMGetLocalToGlobalMapping(da,&ltog));
894 
895   PetscCall(MatSetBlockSize(J,nc));
896   /* determine the matrix preallocation information */
897   MatPreallocateBegin(comm,nc*nx*ny,nc*nx*ny,dnz,onz);
898   for (i=xs; i<xs+nx; i++) {
899 
900     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
901     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
902 
903     for (j=ys; j<ys+ny; j++) {
904       slot = i - gxs + gnx*(j - gys);
905 
906       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
907       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
908 
909       cnt = 0;
910       for (k=0; k<nc; k++) {
911         for (l=lstart; l<lend+1; l++) {
912           for (p=pstart; p<pend+1; p++) {
913             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
914               cols[cnt++] = k + nc*(slot + gnx*l + p);
915             }
916           }
917         }
918         rows[k] = k + nc*(slot);
919       }
920       PetscCall(MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz));
921     }
922   }
923   PetscCall(MatSetBlockSize(J,nc));
924   PetscCall(MatSeqSELLSetPreallocation(J,0,dnz));
925   PetscCall(MatMPISELLSetPreallocation(J,0,dnz,0,onz));
926   MatPreallocateEnd(dnz,onz);
927 
928   PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog));
929 
930   /*
931     For each node in the grid: we get the neighbors in the local (on processor ordering
932     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
933     PETSc ordering.
934   */
935   if (!da->prealloc_only) {
936     PetscCall(PetscCalloc1(col*col*nc*nc,&values));
937     for (i=xs; i<xs+nx; i++) {
938 
939       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
940       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
941 
942       for (j=ys; j<ys+ny; j++) {
943         slot = i - gxs + gnx*(j - gys);
944 
945         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
946         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
947 
948         cnt = 0;
949         for (k=0; k<nc; k++) {
950           for (l=lstart; l<lend+1; l++) {
951             for (p=pstart; p<pend+1; p++) {
952               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
953                 cols[cnt++] = k + nc*(slot + gnx*l + p);
954               }
955             }
956           }
957           rows[k] = k + nc*(slot);
958         }
959         PetscCall(MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES));
960       }
961     }
962     PetscCall(PetscFree(values));
963     /* do not copy values to GPU since they are all zero and not yet needed there */
964     PetscCall(MatBindToCPU(J,PETSC_TRUE));
965     PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY));
966     PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY));
967     PetscCall(MatBindToCPU(J,PETSC_FALSE));
968     PetscCall(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
969   }
970   PetscCall(PetscFree2(rows,cols));
971   PetscFunctionReturn(0);
972 }
973 
974 PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da,Mat J)
975 {
976   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
977   PetscInt               m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL;
978   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
979   MPI_Comm               comm;
980   PetscScalar            *values;
981   DMBoundaryType         bx,by,bz;
982   ISLocalToGlobalMapping ltog;
983   DMDAStencilType        st;
984 
985   PetscFunctionBegin;
986   /*
987          nc - number of components per grid point
988          col - number of colors needed in one direction for single component problem
989 
990   */
991   PetscCall(DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st));
992   col  = 2*s + 1;
993   PetscCall(DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz));
994   PetscCall(DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz));
995   PetscCall(PetscObjectGetComm((PetscObject)da,&comm));
996 
997   PetscCall(PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols));
998   PetscCall(DMGetLocalToGlobalMapping(da,&ltog));
999 
1000   PetscCall(MatSetBlockSize(J,nc));
1001   /* determine the matrix preallocation information */
1002   MatPreallocateBegin(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);
1003   for (i=xs; i<xs+nx; i++) {
1004     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1005     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1006     for (j=ys; j<ys+ny; j++) {
1007       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1008       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1009       for (k=zs; k<zs+nz; k++) {
1010         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1011         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1012 
1013         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1014 
1015         cnt = 0;
1016         for (l=0; l<nc; l++) {
1017           for (ii=istart; ii<iend+1; ii++) {
1018             for (jj=jstart; jj<jend+1; jj++) {
1019               for (kk=kstart; kk<kend+1; kk++) {
1020                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1021                   cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1022                 }
1023               }
1024             }
1025           }
1026           rows[l] = l + nc*(slot);
1027         }
1028         PetscCall(MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz));
1029       }
1030     }
1031   }
1032   PetscCall(MatSetBlockSize(J,nc));
1033   PetscCall(MatSeqSELLSetPreallocation(J,0,dnz));
1034   PetscCall(MatMPISELLSetPreallocation(J,0,dnz,0,onz));
1035   MatPreallocateEnd(dnz,onz);
1036   PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog));
1037 
1038   /*
1039     For each node in the grid: we get the neighbors in the local (on processor ordering
1040     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1041     PETSc ordering.
1042   */
1043   if (!da->prealloc_only) {
1044     PetscCall(PetscCalloc1(col*col*col*nc*nc*nc,&values));
1045     for (i=xs; i<xs+nx; i++) {
1046       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1047       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1048       for (j=ys; j<ys+ny; j++) {
1049         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1050         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1051         for (k=zs; k<zs+nz; k++) {
1052           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1053           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1054 
1055           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1056 
1057           cnt = 0;
1058           for (l=0; l<nc; l++) {
1059             for (ii=istart; ii<iend+1; ii++) {
1060               for (jj=jstart; jj<jend+1; jj++) {
1061                 for (kk=kstart; kk<kend+1; kk++) {
1062                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1063                     cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1064                   }
1065                 }
1066               }
1067             }
1068             rows[l] = l + nc*(slot);
1069           }
1070           PetscCall(MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES));
1071         }
1072       }
1073     }
1074     PetscCall(PetscFree(values));
1075     /* do not copy values to GPU since they are all zero and not yet needed there */
1076     PetscCall(MatBindToCPU(J,PETSC_TRUE));
1077     PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY));
1078     PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY));
1079     PetscCall(MatBindToCPU(J,PETSC_FALSE));
1080     PetscCall(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
1081   }
1082   PetscCall(PetscFree2(rows,cols));
1083   PetscFunctionReturn(0);
1084 }
1085 
1086 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da,Mat J)
1087 {
1088   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,M,N;
1089   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
1090   MPI_Comm               comm;
1091   DMBoundaryType         bx,by;
1092   ISLocalToGlobalMapping ltog,mltog;
1093   DMDAStencilType        st;
1094   PetscBool              removedups = PETSC_FALSE,alreadyboundtocpu = PETSC_TRUE;
1095 
1096   PetscFunctionBegin;
1097   /*
1098          nc - number of components per grid point
1099          col - number of colors needed in one direction for single component problem
1100 
1101   */
1102   PetscCall(DMDAGetInfo(da,&dim,&m,&n,NULL,&M,&N,NULL,&nc,&s,&bx,&by,NULL,&st));
1103   if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE));
1104   col  = 2*s + 1;
1105   /*
1106        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1107        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1108   */
1109   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1110   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1111   PetscCall(DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL));
1112   PetscCall(DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL));
1113   PetscCall(PetscObjectGetComm((PetscObject)da,&comm));
1114 
1115   PetscCall(PetscMalloc2(nc,&rows,col*col*nc*nc,&cols));
1116   PetscCall(DMGetLocalToGlobalMapping(da,&ltog));
1117 
1118   PetscCall(MatSetBlockSize(J,nc));
1119   /* determine the matrix preallocation information */
1120   MatPreallocateBegin(comm,nc*nx*ny,nc*nx*ny,dnz,onz);
1121   for (i=xs; i<xs+nx; i++) {
1122     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1123     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1124 
1125     for (j=ys; j<ys+ny; j++) {
1126       slot = i - gxs + gnx*(j - gys);
1127 
1128       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1129       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1130 
1131       cnt = 0;
1132       for (k=0; k<nc; k++) {
1133         for (l=lstart; l<lend+1; l++) {
1134           for (p=pstart; p<pend+1; p++) {
1135             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
1136               cols[cnt++] = k + nc*(slot + gnx*l + p);
1137             }
1138           }
1139         }
1140         rows[k] = k + nc*(slot);
1141       }
1142       if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz));
1143       else PetscCall(MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz));
1144     }
1145   }
1146   PetscCall(MatSetBlockSize(J,nc));
1147   PetscCall(MatSeqAIJSetPreallocation(J,0,dnz));
1148   PetscCall(MatMPIAIJSetPreallocation(J,0,dnz,0,onz));
1149   MatPreallocateEnd(dnz,onz);
1150   PetscCall(MatGetLocalToGlobalMapping(J,&mltog,NULL));
1151   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog));
1152 
1153   /*
1154     For each node in the grid: we get the neighbors in the local (on processor ordering
1155     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1156     PETSc ordering.
1157   */
1158   if (!da->prealloc_only) {
1159     for (i=xs; i<xs+nx; i++) {
1160 
1161       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1162       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1163 
1164       for (j=ys; j<ys+ny; j++) {
1165         slot = i - gxs + gnx*(j - gys);
1166 
1167         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1168         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1169 
1170         cnt = 0;
1171         for (l=lstart; l<lend+1; l++) {
1172           for (p=pstart; p<pend+1; p++) {
1173             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
1174               cols[cnt++] = nc*(slot + gnx*l + p);
1175               for (k=1; k<nc; k++) {
1176                 cols[cnt] = 1 + cols[cnt-1];cnt++;
1177               }
1178             }
1179           }
1180         }
1181         for (k=0; k<nc; k++) rows[k] = k + nc*(slot);
1182         PetscCall(MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES));
1183       }
1184     }
1185     /* do not copy values to GPU since they are all zero and not yet needed there */
1186     PetscCall(MatBoundToCPU(J,&alreadyboundtocpu));
1187     PetscCall(MatBindToCPU(J,PETSC_TRUE));
1188     PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY));
1189     PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY));
1190     if (!alreadyboundtocpu) PetscCall(MatBindToCPU(J,PETSC_FALSE));
1191     PetscCall(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
1192     if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE));
1193   }
1194   PetscCall(PetscFree2(rows,cols));
1195   PetscFunctionReturn(0);
1196 }
1197 
1198 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da,Mat J)
1199 {
1200   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1201   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt,maxcnt = 0,l,p,M,N;
1202   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
1203   DM_DA                  *dd = (DM_DA*)da->data;
1204   PetscInt               ifill_col,*ofill = dd->ofill, *dfill = dd->dfill;
1205   MPI_Comm               comm;
1206   DMBoundaryType         bx,by;
1207   ISLocalToGlobalMapping ltog;
1208   DMDAStencilType        st;
1209   PetscBool              removedups = PETSC_FALSE;
1210 
1211   PetscFunctionBegin;
1212   /*
1213          nc - number of components per grid point
1214          col - number of colors needed in one direction for single component problem
1215 
1216   */
1217   PetscCall(DMDAGetInfo(da,&dim,&m,&n,NULL,&M,&N,NULL,&nc,&s,&bx,&by,NULL,&st));
1218   col  = 2*s + 1;
1219   /*
1220        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1221        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1222   */
1223   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1224   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1225   PetscCall(DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL));
1226   PetscCall(DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL));
1227   PetscCall(PetscObjectGetComm((PetscObject)da,&comm));
1228 
1229   PetscCall(PetscMalloc1(col*col*nc,&cols));
1230   PetscCall(DMGetLocalToGlobalMapping(da,&ltog));
1231 
1232   PetscCall(MatSetBlockSize(J,nc));
1233   /* determine the matrix preallocation information */
1234   MatPreallocateBegin(comm,nc*nx*ny,nc*nx*ny,dnz,onz);
1235   for (i=xs; i<xs+nx; i++) {
1236 
1237     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1238     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1239 
1240     for (j=ys; j<ys+ny; j++) {
1241       slot = i - gxs + gnx*(j - gys);
1242 
1243       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1244       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1245 
1246       for (k=0; k<nc; k++) {
1247         cnt = 0;
1248         for (l=lstart; l<lend+1; l++) {
1249           for (p=pstart; p<pend+1; p++) {
1250             if (l || p) {
1251               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
1252                 for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p);
1253               }
1254             } else {
1255               if (dfill) {
1256                 for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p);
1257               } else {
1258                 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p);
1259               }
1260             }
1261           }
1262         }
1263         row    = k + nc*(slot);
1264         maxcnt = PetscMax(maxcnt,cnt);
1265         if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz));
1266         else PetscCall(MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz));
1267       }
1268     }
1269   }
1270   PetscCall(MatSeqAIJSetPreallocation(J,0,dnz));
1271   PetscCall(MatMPIAIJSetPreallocation(J,0,dnz,0,onz));
1272   MatPreallocateEnd(dnz,onz);
1273   PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog));
1274 
1275   /*
1276     For each node in the grid: we get the neighbors in the local (on processor ordering
1277     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1278     PETSc ordering.
1279   */
1280   if (!da->prealloc_only) {
1281     for (i=xs; i<xs+nx; i++) {
1282 
1283       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1284       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1285 
1286       for (j=ys; j<ys+ny; j++) {
1287         slot = i - gxs + gnx*(j - gys);
1288 
1289         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1290         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1291 
1292         for (k=0; k<nc; k++) {
1293           cnt = 0;
1294           for (l=lstart; l<lend+1; l++) {
1295             for (p=pstart; p<pend+1; p++) {
1296               if (l || p) {
1297                 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
1298                   for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p);
1299                 }
1300               } else {
1301                 if (dfill) {
1302                   for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p);
1303                 } else {
1304                   for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p);
1305                 }
1306               }
1307             }
1308           }
1309           row  = k + nc*(slot);
1310           PetscCall(MatSetValuesLocal(J,1,&row,cnt,cols,NULL,INSERT_VALUES));
1311         }
1312       }
1313     }
1314     /* do not copy values to GPU since they are all zero and not yet needed there */
1315     PetscCall(MatBindToCPU(J,PETSC_TRUE));
1316     PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY));
1317     PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY));
1318     PetscCall(MatBindToCPU(J,PETSC_FALSE));
1319     PetscCall(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
1320   }
1321   PetscCall(PetscFree(cols));
1322   PetscFunctionReturn(0);
1323 }
1324 
1325 /* ---------------------------------------------------------------------------------*/
1326 
1327 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da,Mat J)
1328 {
1329   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1330   PetscInt               m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL;
1331   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
1332   MPI_Comm               comm;
1333   DMBoundaryType         bx,by,bz;
1334   ISLocalToGlobalMapping ltog,mltog;
1335   DMDAStencilType        st;
1336   PetscBool              removedups = PETSC_FALSE;
1337 
1338   PetscFunctionBegin;
1339   /*
1340          nc - number of components per grid point
1341          col - number of colors needed in one direction for single component problem
1342 
1343   */
1344   PetscCall(DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st));
1345   if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) {
1346     PetscCall(MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE));
1347   }
1348   col  = 2*s + 1;
1349 
1350   /*
1351        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1352        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1353   */
1354   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1355   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1356   if (P == 1 && 2*s >= p) removedups = PETSC_TRUE;
1357 
1358   PetscCall(DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz));
1359   PetscCall(DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz));
1360   PetscCall(PetscObjectGetComm((PetscObject)da,&comm));
1361 
1362   PetscCall(PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols));
1363   PetscCall(DMGetLocalToGlobalMapping(da,&ltog));
1364 
1365   PetscCall(MatSetBlockSize(J,nc));
1366   /* determine the matrix preallocation information */
1367   MatPreallocateBegin(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);
1368   for (i=xs; i<xs+nx; i++) {
1369     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1370     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1371     for (j=ys; j<ys+ny; j++) {
1372       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1373       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1374       for (k=zs; k<zs+nz; k++) {
1375         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1376         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1377 
1378         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1379 
1380         cnt = 0;
1381         for (l=0; l<nc; l++) {
1382           for (ii=istart; ii<iend+1; ii++) {
1383             for (jj=jstart; jj<jend+1; jj++) {
1384               for (kk=kstart; kk<kend+1; kk++) {
1385                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1386                   cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1387                 }
1388               }
1389             }
1390           }
1391           rows[l] = l + nc*(slot);
1392         }
1393         if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz));
1394         else PetscCall(MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz));
1395       }
1396     }
1397   }
1398   PetscCall(MatSetBlockSize(J,nc));
1399   PetscCall(MatSeqAIJSetPreallocation(J,0,dnz));
1400   PetscCall(MatMPIAIJSetPreallocation(J,0,dnz,0,onz));
1401   MatPreallocateEnd(dnz,onz);
1402   PetscCall(MatGetLocalToGlobalMapping(J,&mltog,NULL));
1403   if (!mltog) {
1404     PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog));
1405   }
1406 
1407   /*
1408     For each node in the grid: we get the neighbors in the local (on processor ordering
1409     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1410     PETSc ordering.
1411   */
1412   if (!da->prealloc_only) {
1413     for (i=xs; i<xs+nx; i++) {
1414       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1415       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1416       for (j=ys; j<ys+ny; j++) {
1417         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1418         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1419         for (k=zs; k<zs+nz; k++) {
1420           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1421           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1422 
1423           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1424 
1425           cnt = 0;
1426           for (kk=kstart; kk<kend+1; kk++) {
1427             for (jj=jstart; jj<jend+1; jj++) {
1428               for (ii=istart; ii<iend+1; ii++) {
1429                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1430                   cols[cnt++] = nc*(slot + ii + gnx*jj + gnx*gny*kk);
1431                     for (l=1; l<nc; l++) {
1432                       cols[cnt] = 1 + cols[cnt-1];cnt++;
1433                   }
1434                 }
1435               }
1436             }
1437           }
1438           rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1];
1439           PetscCall(MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES));
1440         }
1441       }
1442     }
1443     /* do not copy values to GPU since they are all zero and not yet needed there */
1444     PetscCall(MatBindToCPU(J,PETSC_TRUE));
1445     PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY));
1446     PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY));
1447     if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) {
1448       PetscCall(MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE));
1449     }
1450     PetscCall(MatBindToCPU(J,PETSC_FALSE));
1451     PetscCall(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
1452   }
1453   PetscCall(PetscFree2(rows,cols));
1454   PetscFunctionReturn(0);
1455 }
1456 
1457 /* ---------------------------------------------------------------------------------*/
1458 
1459 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da,Mat J)
1460 {
1461   DM_DA                  *dd = (DM_DA*)da->data;
1462   PetscInt               xs,nx,i,j,gxs,gnx,row,k,l;
1463   PetscInt               m,dim,s,*cols = NULL,nc,cnt,maxcnt = 0,*ocols;
1464   PetscInt               *ofill = dd->ofill,*dfill = dd->dfill;
1465   DMBoundaryType         bx;
1466   ISLocalToGlobalMapping ltog;
1467   PetscMPIInt            rank,size;
1468 
1469   PetscFunctionBegin;
1470   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)da),&rank));
1471   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da),&size));
1472 
1473   /*
1474          nc - number of components per grid point
1475 
1476   */
1477   PetscCall(DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL));
1478   PetscCheck(s <= 1,PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Matrix creation for 1d not implemented correctly for stencil width larger than 1");
1479   PetscCall(DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL));
1480   PetscCall(DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL));
1481 
1482   PetscCall(MatSetBlockSize(J,nc));
1483   PetscCall(PetscCalloc2(nx*nc,&cols,nx*nc,&ocols));
1484 
1485   /*
1486         note should be smaller for first and last process with no periodic
1487         does not handle dfill
1488   */
1489   cnt = 0;
1490   /* coupling with process to the left */
1491   for (i=0; i<s; i++) {
1492     for (j=0; j<nc; j++) {
1493       ocols[cnt] = ((rank == 0) ? 0 : (s - i)*(ofill[j+1] - ofill[j]));
1494       cols[cnt]  = dfill[j+1] - dfill[j] + (s + i)*(ofill[j+1] - ofill[j]);
1495       if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1496         if (size > 1) ocols[cnt] += (s - i)*(ofill[j+1] - ofill[j]);
1497         else cols[cnt] += (s - i)*(ofill[j+1] - ofill[j]);
1498       }
1499       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1500       cnt++;
1501     }
1502   }
1503   for (i=s; i<nx-s; i++) {
1504     for (j=0; j<nc; j++) {
1505       cols[cnt] = dfill[j+1] - dfill[j] + 2*s*(ofill[j+1] - ofill[j]);
1506       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1507       cnt++;
1508     }
1509   }
1510   /* coupling with process to the right */
1511   for (i=nx-s; i<nx; i++) {
1512     for (j=0; j<nc; j++) {
1513       ocols[cnt] = ((rank == (size-1)) ? 0 : (i - nx + s + 1)*(ofill[j+1] - ofill[j]));
1514       cols[cnt]  = dfill[j+1] - dfill[j] + (s + nx - i - 1)*(ofill[j+1] - ofill[j]);
1515       if ((rank == size-1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1516         if (size > 1) ocols[cnt] += (i - nx + s + 1)*(ofill[j+1] - ofill[j]);
1517         else cols[cnt] += (i - nx + s + 1)*(ofill[j+1] - ofill[j]);
1518       }
1519       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1520       cnt++;
1521     }
1522   }
1523 
1524   PetscCall(MatSeqAIJSetPreallocation(J,0,cols));
1525   PetscCall(MatMPIAIJSetPreallocation(J,0,cols,0,ocols));
1526   PetscCall(PetscFree2(cols,ocols));
1527 
1528   PetscCall(DMGetLocalToGlobalMapping(da,&ltog));
1529   PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog));
1530 
1531   /*
1532     For each node in the grid: we get the neighbors in the local (on processor ordering
1533     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1534     PETSc ordering.
1535   */
1536   if (!da->prealloc_only) {
1537     PetscCall(PetscMalloc1(maxcnt,&cols));
1538     row = xs*nc;
1539     /* coupling with process to the left */
1540     for (i=xs; i<xs+s; i++) {
1541       for (j=0; j<nc; j++) {
1542         cnt = 0;
1543         if (rank) {
1544           for (l=0; l<s; l++) {
1545             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1546           }
1547         }
1548         if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1549           for (l=0; l<s; l++) {
1550             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (m + i - s - l)*nc + ofill[k];
1551           }
1552         }
1553         if (dfill) {
1554           for (k=dfill[j]; k<dfill[j+1]; k++) {
1555             cols[cnt++] = i*nc + dfill[k];
1556           }
1557         } else {
1558           for (k=0; k<nc; k++) {
1559             cols[cnt++] = i*nc + k;
1560           }
1561         }
1562         for (l=0; l<s; l++) {
1563           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1564         }
1565         PetscCall(MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES));
1566         row++;
1567       }
1568     }
1569     for (i=xs+s; i<xs+nx-s; i++) {
1570       for (j=0; j<nc; j++) {
1571         cnt = 0;
1572         for (l=0; l<s; l++) {
1573           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1574         }
1575         if (dfill) {
1576           for (k=dfill[j]; k<dfill[j+1]; k++) {
1577             cols[cnt++] = i*nc + dfill[k];
1578           }
1579         } else {
1580           for (k=0; k<nc; k++) {
1581             cols[cnt++] = i*nc + k;
1582           }
1583         }
1584         for (l=0; l<s; l++) {
1585           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1586         }
1587         PetscCall(MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES));
1588         row++;
1589       }
1590     }
1591     /* coupling with process to the right */
1592     for (i=xs+nx-s; i<xs+nx; i++) {
1593       for (j=0; j<nc; j++) {
1594         cnt = 0;
1595         for (l=0; l<s; l++) {
1596           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1597         }
1598         if (dfill) {
1599           for (k=dfill[j]; k<dfill[j+1]; k++) {
1600             cols[cnt++] = i*nc + dfill[k];
1601           }
1602         } else {
1603           for (k=0; k<nc; k++) {
1604             cols[cnt++] = i*nc + k;
1605           }
1606         }
1607         if (rank < size-1) {
1608           for (l=0; l<s; l++) {
1609             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1610           }
1611         }
1612         if ((rank == size-1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1613           for (l=0; l<s; l++) {
1614             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s - l - m + 2)*nc + ofill[k];
1615           }
1616         }
1617         PetscCall(MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES));
1618         row++;
1619       }
1620     }
1621     PetscCall(PetscFree(cols));
1622     /* do not copy values to GPU since they are all zero and not yet needed there */
1623     PetscCall(MatBindToCPU(J,PETSC_TRUE));
1624     PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY));
1625     PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY));
1626     PetscCall(MatBindToCPU(J,PETSC_FALSE));
1627     PetscCall(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
1628   }
1629   PetscFunctionReturn(0);
1630 }
1631 
1632 /* ---------------------------------------------------------------------------------*/
1633 
1634 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da,Mat J)
1635 {
1636   PetscInt               xs,nx,i,i1,slot,gxs,gnx;
1637   PetscInt               m,dim,s,*cols = NULL,nc,*rows = NULL,col,cnt,l;
1638   PetscInt               istart,iend;
1639   DMBoundaryType         bx;
1640   ISLocalToGlobalMapping ltog,mltog;
1641 
1642   PetscFunctionBegin;
1643   /*
1644          nc - number of components per grid point
1645          col - number of colors needed in one direction for single component problem
1646 
1647   */
1648   PetscCall(DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL));
1649   if (bx == DM_BOUNDARY_NONE) {
1650     PetscCall(MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE));
1651   }
1652   col  = 2*s + 1;
1653 
1654   PetscCall(DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL));
1655   PetscCall(DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL));
1656 
1657   PetscCall(MatSetBlockSize(J,nc));
1658   PetscCall(MatSeqAIJSetPreallocation(J,col*nc,NULL));
1659   PetscCall(MatMPIAIJSetPreallocation(J,col*nc,NULL,col*nc,NULL));
1660 
1661   PetscCall(DMGetLocalToGlobalMapping(da,&ltog));
1662   PetscCall(MatGetLocalToGlobalMapping(J,&mltog,NULL));
1663   if (!mltog) {
1664     PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog));
1665   }
1666 
1667   /*
1668     For each node in the grid: we get the neighbors in the local (on processor ordering
1669     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1670     PETSc ordering.
1671   */
1672   if (!da->prealloc_only) {
1673     PetscCall(PetscMalloc2(nc,&rows,col*nc*nc,&cols));
1674     for (i=xs; i<xs+nx; i++) {
1675       istart = PetscMax(-s,gxs - i);
1676       iend   = PetscMin(s,gxs + gnx - i - 1);
1677       slot   = i - gxs;
1678 
1679       cnt = 0;
1680       for (i1=istart; i1<iend+1; i1++) {
1681         cols[cnt++] = nc*(slot + i1);
1682         for (l=1; l<nc; l++) {
1683           cols[cnt] = 1 + cols[cnt-1];cnt++;
1684         }
1685       }
1686       rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1];
1687       PetscCall(MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES));
1688     }
1689     /* do not copy values to GPU since they are all zero and not yet needed there */
1690     PetscCall(MatBindToCPU(J,PETSC_TRUE));
1691     PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY));
1692     PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY));
1693     if (bx == DM_BOUNDARY_NONE) {
1694       PetscCall(MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE));
1695     }
1696     PetscCall(MatBindToCPU(J,PETSC_FALSE));
1697     PetscCall(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
1698     PetscCall(PetscFree2(rows,cols));
1699   }
1700   PetscFunctionReturn(0);
1701 }
1702 
1703 /* ---------------------------------------------------------------------------------*/
1704 
1705 PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM da,Mat J)
1706 {
1707   PetscInt               xs,nx,i,i1,slot,gxs,gnx;
1708   PetscInt               m,dim,s,*cols = NULL,nc,*rows = NULL,col,cnt,l;
1709   PetscInt               istart,iend;
1710   DMBoundaryType         bx;
1711   ISLocalToGlobalMapping ltog,mltog;
1712 
1713   PetscFunctionBegin;
1714   /*
1715          nc - number of components per grid point
1716          col - number of colors needed in one direction for single component problem
1717   */
1718   PetscCall(DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL));
1719   col  = 2*s + 1;
1720 
1721   PetscCall(DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL));
1722   PetscCall(DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL));
1723 
1724   PetscCall(MatSetBlockSize(J,nc));
1725   PetscCall(MatSeqAIJSetTotalPreallocation(J,nx*nc*col*nc));
1726 
1727   PetscCall(DMGetLocalToGlobalMapping(da,&ltog));
1728   PetscCall(MatGetLocalToGlobalMapping(J,&mltog,NULL));
1729   if (!mltog) {
1730     PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog));
1731   }
1732 
1733   /*
1734     For each node in the grid: we get the neighbors in the local (on processor ordering
1735     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1736     PETSc ordering.
1737   */
1738   if (!da->prealloc_only) {
1739     PetscCall(PetscMalloc2(nc,&rows,col*nc*nc,&cols));
1740     for (i=xs; i<xs+nx; i++) {
1741       istart = PetscMax(-s,gxs - i);
1742       iend   = PetscMin(s,gxs + gnx - i - 1);
1743       slot   = i - gxs;
1744 
1745       cnt = 0;
1746       for (i1=istart; i1<iend+1; i1++) {
1747         cols[cnt++] = nc*(slot + i1);
1748         for (l=1; l<nc; l++) {
1749           cols[cnt] = 1 + cols[cnt-1];cnt++;
1750         }
1751       }
1752       rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1];
1753       PetscCall(MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES));
1754     }
1755     /* do not copy values to GPU since they are all zero and not yet needed there */
1756     PetscCall(MatBindToCPU(J,PETSC_TRUE));
1757     PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY));
1758     PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY));
1759     if (bx == DM_BOUNDARY_NONE) {
1760       PetscCall(MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE));
1761     }
1762     PetscCall(MatBindToCPU(J,PETSC_FALSE));
1763     PetscCall(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
1764     PetscCall(PetscFree2(rows,cols));
1765   }
1766   PetscCall(MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE));
1767   PetscFunctionReturn(0);
1768 }
1769 
1770 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da,Mat J)
1771 {
1772   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1773   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
1774   PetscInt               istart,iend,jstart,jend,ii,jj;
1775   MPI_Comm               comm;
1776   PetscScalar            *values;
1777   DMBoundaryType         bx,by;
1778   DMDAStencilType        st;
1779   ISLocalToGlobalMapping ltog;
1780 
1781   PetscFunctionBegin;
1782   /*
1783      nc - number of components per grid point
1784      col - number of colors needed in one direction for single component problem
1785   */
1786   PetscCall(DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st));
1787   col  = 2*s + 1;
1788 
1789   PetscCall(DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL));
1790   PetscCall(DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL));
1791   PetscCall(PetscObjectGetComm((PetscObject)da,&comm));
1792 
1793   PetscCall(PetscMalloc1(col*col*nc*nc,&cols));
1794 
1795   PetscCall(DMGetLocalToGlobalMapping(da,&ltog));
1796 
1797   /* determine the matrix preallocation information */
1798   MatPreallocateBegin(comm,nx*ny,nx*ny,dnz,onz);
1799   for (i=xs; i<xs+nx; i++) {
1800     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1801     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1802     for (j=ys; j<ys+ny; j++) {
1803       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1804       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1805       slot   = i - gxs + gnx*(j - gys);
1806 
1807       /* Find block columns in block row */
1808       cnt = 0;
1809       for (ii=istart; ii<iend+1; ii++) {
1810         for (jj=jstart; jj<jend+1; jj++) {
1811           if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1812             cols[cnt++] = slot + ii + gnx*jj;
1813           }
1814         }
1815       }
1816       PetscCall(MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz));
1817     }
1818   }
1819   PetscCall(MatSeqBAIJSetPreallocation(J,nc,0,dnz));
1820   PetscCall(MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz));
1821   MatPreallocateEnd(dnz,onz);
1822 
1823   PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog));
1824 
1825   /*
1826     For each node in the grid: we get the neighbors in the local (on processor ordering
1827     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1828     PETSc ordering.
1829   */
1830   if (!da->prealloc_only) {
1831     PetscCall(PetscCalloc1(col*col*nc*nc,&values));
1832     for (i=xs; i<xs+nx; i++) {
1833       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1834       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1835       for (j=ys; j<ys+ny; j++) {
1836         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1837         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1838         slot = i - gxs + gnx*(j - gys);
1839         cnt  = 0;
1840         for (ii=istart; ii<iend+1; ii++) {
1841           for (jj=jstart; jj<jend+1; jj++) {
1842             if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1843               cols[cnt++] = slot + ii + gnx*jj;
1844             }
1845           }
1846         }
1847         PetscCall(MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES));
1848       }
1849     }
1850     PetscCall(PetscFree(values));
1851     /* do not copy values to GPU since they are all zero and not yet needed there */
1852     PetscCall(MatBindToCPU(J,PETSC_TRUE));
1853     PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY));
1854     PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY));
1855     PetscCall(MatBindToCPU(J,PETSC_FALSE));
1856     PetscCall(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
1857   }
1858   PetscCall(PetscFree(cols));
1859   PetscFunctionReturn(0);
1860 }
1861 
1862 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da,Mat J)
1863 {
1864   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1865   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
1866   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1867   MPI_Comm               comm;
1868   PetscScalar            *values;
1869   DMBoundaryType         bx,by,bz;
1870   DMDAStencilType        st;
1871   ISLocalToGlobalMapping ltog;
1872 
1873   PetscFunctionBegin;
1874   /*
1875          nc - number of components per grid point
1876          col - number of colors needed in one direction for single component problem
1877 
1878   */
1879   PetscCall(DMDAGetInfo(da,&dim,&m,&n,&p,NULL,NULL,NULL,&nc,&s,&bx,&by,&bz,&st));
1880   col  = 2*s + 1;
1881 
1882   PetscCall(DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz));
1883   PetscCall(DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz));
1884   PetscCall(PetscObjectGetComm((PetscObject)da,&comm));
1885 
1886   PetscCall(PetscMalloc1(col*col*col,&cols));
1887 
1888   PetscCall(DMGetLocalToGlobalMapping(da,&ltog));
1889 
1890   /* determine the matrix preallocation information */
1891   MatPreallocateBegin(comm,nx*ny*nz,nx*ny*nz,dnz,onz);
1892   for (i=xs; i<xs+nx; i++) {
1893     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1894     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1895     for (j=ys; j<ys+ny; j++) {
1896       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1897       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1898       for (k=zs; k<zs+nz; k++) {
1899         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1900         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1901 
1902         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1903 
1904         /* Find block columns in block row */
1905         cnt = 0;
1906         for (ii=istart; ii<iend+1; ii++) {
1907           for (jj=jstart; jj<jend+1; jj++) {
1908             for (kk=kstart; kk<kend+1; kk++) {
1909               if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1910                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1911               }
1912             }
1913           }
1914         }
1915         PetscCall(MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz));
1916       }
1917     }
1918   }
1919   PetscCall(MatSeqBAIJSetPreallocation(J,nc,0,dnz));
1920   PetscCall(MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz));
1921   MatPreallocateEnd(dnz,onz);
1922 
1923   PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog));
1924 
1925   /*
1926     For each node in the grid: we get the neighbors in the local (on processor ordering
1927     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1928     PETSc ordering.
1929   */
1930   if (!da->prealloc_only) {
1931     PetscCall(PetscCalloc1(col*col*col*nc*nc,&values));
1932     for (i=xs; i<xs+nx; i++) {
1933       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1934       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1935       for (j=ys; j<ys+ny; j++) {
1936         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1937         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1938         for (k=zs; k<zs+nz; k++) {
1939           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1940           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1941 
1942           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1943 
1944           cnt = 0;
1945           for (ii=istart; ii<iend+1; ii++) {
1946             for (jj=jstart; jj<jend+1; jj++) {
1947               for (kk=kstart; kk<kend+1; kk++) {
1948                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1949                   cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1950                 }
1951               }
1952             }
1953           }
1954           PetscCall(MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES));
1955         }
1956       }
1957     }
1958     PetscCall(PetscFree(values));
1959     /* do not copy values to GPU since they are all zero and not yet needed there */
1960     PetscCall(MatBindToCPU(J,PETSC_TRUE));
1961     PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY));
1962     PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY));
1963     PetscCall(MatBindToCPU(J,PETSC_FALSE));
1964     PetscCall(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
1965   }
1966   PetscCall(PetscFree(cols));
1967   PetscFunctionReturn(0);
1968 }
1969 
1970 /*
1971   This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to
1972   identify in the local ordering with periodic domain.
1973 */
1974 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog,PetscInt *row,PetscInt *cnt,PetscInt col[])
1975 {
1976   PetscInt       i,n;
1977 
1978   PetscFunctionBegin;
1979   PetscCall(ISLocalToGlobalMappingApplyBlock(ltog,1,row,row));
1980   PetscCall(ISLocalToGlobalMappingApplyBlock(ltog,*cnt,col,col));
1981   for (i=0,n=0; i<*cnt; i++) {
1982     if (col[i] >= *row) col[n++] = col[i];
1983   }
1984   *cnt = n;
1985   PetscFunctionReturn(0);
1986 }
1987 
1988 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da,Mat J)
1989 {
1990   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1991   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
1992   PetscInt               istart,iend,jstart,jend,ii,jj;
1993   MPI_Comm               comm;
1994   PetscScalar            *values;
1995   DMBoundaryType         bx,by;
1996   DMDAStencilType        st;
1997   ISLocalToGlobalMapping ltog;
1998 
1999   PetscFunctionBegin;
2000   /*
2001      nc - number of components per grid point
2002      col - number of colors needed in one direction for single component problem
2003   */
2004   PetscCall(DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st));
2005   col  = 2*s + 1;
2006 
2007   PetscCall(DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL));
2008   PetscCall(DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL));
2009   PetscCall(PetscObjectGetComm((PetscObject)da,&comm));
2010 
2011   PetscCall(PetscMalloc1(col*col*nc*nc,&cols));
2012 
2013   PetscCall(DMGetLocalToGlobalMapping(da,&ltog));
2014 
2015   /* determine the matrix preallocation information */
2016   MatPreallocateBegin(comm,nx*ny,nx*ny,dnz,onz);
2017   for (i=xs; i<xs+nx; i++) {
2018     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2019     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2020     for (j=ys; j<ys+ny; j++) {
2021       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2022       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2023       slot   = i - gxs + gnx*(j - gys);
2024 
2025       /* Find block columns in block row */
2026       cnt = 0;
2027       for (ii=istart; ii<iend+1; ii++) {
2028         for (jj=jstart; jj<jend+1; jj++) {
2029           if (st == DMDA_STENCIL_BOX || !ii || !jj) {
2030             cols[cnt++] = slot + ii + gnx*jj;
2031           }
2032         }
2033       }
2034       PetscCall(L2GFilterUpperTriangular(ltog,&slot,&cnt,cols));
2035       PetscCall(MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz));
2036     }
2037   }
2038   PetscCall(MatSeqSBAIJSetPreallocation(J,nc,0,dnz));
2039   PetscCall(MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz));
2040   MatPreallocateEnd(dnz,onz);
2041 
2042   PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog));
2043 
2044   /*
2045     For each node in the grid: we get the neighbors in the local (on processor ordering
2046     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2047     PETSc ordering.
2048   */
2049   if (!da->prealloc_only) {
2050     PetscCall(PetscCalloc1(col*col*nc*nc,&values));
2051     for (i=xs; i<xs+nx; i++) {
2052       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2053       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2054       for (j=ys; j<ys+ny; j++) {
2055         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2056         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2057         slot   = i - gxs + gnx*(j - gys);
2058 
2059         /* Find block columns in block row */
2060         cnt = 0;
2061         for (ii=istart; ii<iend+1; ii++) {
2062           for (jj=jstart; jj<jend+1; jj++) {
2063             if (st == DMDA_STENCIL_BOX || !ii || !jj) {
2064               cols[cnt++] = slot + ii + gnx*jj;
2065             }
2066           }
2067         }
2068         PetscCall(L2GFilterUpperTriangular(ltog,&slot,&cnt,cols));
2069         PetscCall(MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES));
2070       }
2071     }
2072     PetscCall(PetscFree(values));
2073     /* do not copy values to GPU since they are all zero and not yet needed there */
2074     PetscCall(MatBindToCPU(J,PETSC_TRUE));
2075     PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY));
2076     PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY));
2077     PetscCall(MatBindToCPU(J,PETSC_FALSE));
2078     PetscCall(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
2079   }
2080   PetscCall(PetscFree(cols));
2081   PetscFunctionReturn(0);
2082 }
2083 
2084 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da,Mat J)
2085 {
2086   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
2087   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
2088   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
2089   MPI_Comm               comm;
2090   PetscScalar            *values;
2091   DMBoundaryType         bx,by,bz;
2092   DMDAStencilType        st;
2093   ISLocalToGlobalMapping ltog;
2094 
2095   PetscFunctionBegin;
2096   /*
2097      nc - number of components per grid point
2098      col - number of colors needed in one direction for single component problem
2099   */
2100   PetscCall(DMDAGetInfo(da,&dim,&m,&n,&p,NULL,NULL,NULL,&nc,&s,&bx,&by,&bz,&st));
2101   col  = 2*s + 1;
2102 
2103   PetscCall(DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz));
2104   PetscCall(DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz));
2105   PetscCall(PetscObjectGetComm((PetscObject)da,&comm));
2106 
2107   /* create the matrix */
2108   PetscCall(PetscMalloc1(col*col*col,&cols));
2109 
2110   PetscCall(DMGetLocalToGlobalMapping(da,&ltog));
2111 
2112   /* determine the matrix preallocation information */
2113   MatPreallocateBegin(comm,nx*ny*nz,nx*ny*nz,dnz,onz);
2114   for (i=xs; i<xs+nx; i++) {
2115     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2116     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2117     for (j=ys; j<ys+ny; j++) {
2118       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2119       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2120       for (k=zs; k<zs+nz; k++) {
2121         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2122         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2123 
2124         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2125 
2126         /* Find block columns in block row */
2127         cnt = 0;
2128         for (ii=istart; ii<iend+1; ii++) {
2129           for (jj=jstart; jj<jend+1; jj++) {
2130             for (kk=kstart; kk<kend+1; kk++) {
2131               if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
2132                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
2133               }
2134             }
2135           }
2136         }
2137         PetscCall(L2GFilterUpperTriangular(ltog,&slot,&cnt,cols));
2138         PetscCall(MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz));
2139       }
2140     }
2141   }
2142   PetscCall(MatSeqSBAIJSetPreallocation(J,nc,0,dnz));
2143   PetscCall(MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz));
2144   MatPreallocateEnd(dnz,onz);
2145 
2146   PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog));
2147 
2148   /*
2149     For each node in the grid: we get the neighbors in the local (on processor ordering
2150     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2151     PETSc ordering.
2152   */
2153   if (!da->prealloc_only) {
2154     PetscCall(PetscCalloc1(col*col*col*nc*nc,&values));
2155     for (i=xs; i<xs+nx; i++) {
2156       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2157       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2158       for (j=ys; j<ys+ny; j++) {
2159         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2160         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2161         for (k=zs; k<zs+nz; k++) {
2162           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2163           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2164 
2165           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2166 
2167           cnt = 0;
2168           for (ii=istart; ii<iend+1; ii++) {
2169             for (jj=jstart; jj<jend+1; jj++) {
2170               for (kk=kstart; kk<kend+1; kk++) {
2171                 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
2172                   cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
2173                 }
2174               }
2175             }
2176           }
2177           PetscCall(L2GFilterUpperTriangular(ltog,&slot,&cnt,cols));
2178           PetscCall(MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES));
2179         }
2180       }
2181     }
2182     PetscCall(PetscFree(values));
2183     /* do not copy values to GPU since they are all zero and not yet needed there */
2184     PetscCall(MatBindToCPU(J,PETSC_TRUE));
2185     PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY));
2186     PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY));
2187     PetscCall(MatBindToCPU(J,PETSC_FALSE));
2188     PetscCall(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
2189   }
2190   PetscCall(PetscFree(cols));
2191   PetscFunctionReturn(0);
2192 }
2193 
2194 /* ---------------------------------------------------------------------------------*/
2195 
2196 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da,Mat J)
2197 {
2198   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
2199   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt, maxcnt = 0,l,p,*dnz,*onz;
2200   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
2201   DM_DA                  *dd = (DM_DA*)da->data;
2202   PetscInt               ifill_col,*dfill = dd->dfill,*ofill = dd->ofill;
2203   MPI_Comm               comm;
2204   PetscScalar            *values;
2205   DMBoundaryType         bx,by,bz;
2206   ISLocalToGlobalMapping ltog;
2207   DMDAStencilType        st;
2208   PetscBool              removedups = PETSC_FALSE;
2209 
2210   PetscFunctionBegin;
2211   /*
2212          nc - number of components per grid point
2213          col - number of colors needed in one direction for single component problem
2214 
2215   */
2216   PetscCall(DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st));
2217   col  = 2*s + 1;
2218   PetscCheck(bx != DM_BOUNDARY_PERIODIC || (m % col) == 0,PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
2219                  by 2*stencil_width + 1\n");
2220   PetscCheck(by != DM_BOUNDARY_PERIODIC || (n % col) == 0,PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
2221                  by 2*stencil_width + 1\n");
2222   PetscCheck(bz != DM_BOUNDARY_PERIODIC || (p % col) == 0,PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
2223                  by 2*stencil_width + 1\n");
2224 
2225   /*
2226        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
2227        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
2228   */
2229   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
2230   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
2231   if (P == 1 && 2*s >= p) removedups = PETSC_TRUE;
2232 
2233   PetscCall(DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz));
2234   PetscCall(DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz));
2235   PetscCall(PetscObjectGetComm((PetscObject)da,&comm));
2236 
2237   PetscCall(PetscMalloc1(col*col*col*nc,&cols));
2238   PetscCall(DMGetLocalToGlobalMapping(da,&ltog));
2239 
2240   /* determine the matrix preallocation information */
2241   MatPreallocateBegin(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);
2242 
2243   PetscCall(MatSetBlockSize(J,nc));
2244   for (i=xs; i<xs+nx; i++) {
2245     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2246     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2247     for (j=ys; j<ys+ny; j++) {
2248       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2249       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2250       for (k=zs; k<zs+nz; k++) {
2251         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2252         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2253 
2254         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2255 
2256         for (l=0; l<nc; l++) {
2257           cnt = 0;
2258           for (ii=istart; ii<iend+1; ii++) {
2259             for (jj=jstart; jj<jend+1; jj++) {
2260               for (kk=kstart; kk<kend+1; kk++) {
2261                 if (ii || jj || kk) {
2262                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
2263                     for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2264                   }
2265                 } else {
2266                   if (dfill) {
2267                     for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2268                   } else {
2269                     for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2270                   }
2271                 }
2272               }
2273             }
2274           }
2275           row  = l + nc*(slot);
2276           maxcnt = PetscMax(maxcnt,cnt);
2277           if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz));
2278           else PetscCall(MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz));
2279         }
2280       }
2281     }
2282   }
2283   PetscCall(MatSeqAIJSetPreallocation(J,0,dnz));
2284   PetscCall(MatMPIAIJSetPreallocation(J,0,dnz,0,onz));
2285   MatPreallocateEnd(dnz,onz);
2286   PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog));
2287 
2288   /*
2289     For each node in the grid: we get the neighbors in the local (on processor ordering
2290     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2291     PETSc ordering.
2292   */
2293   if (!da->prealloc_only) {
2294     PetscCall(PetscCalloc1(maxcnt,&values));
2295     for (i=xs; i<xs+nx; i++) {
2296       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2297       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2298       for (j=ys; j<ys+ny; j++) {
2299         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2300         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2301         for (k=zs; k<zs+nz; k++) {
2302           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2303           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2304 
2305           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2306 
2307           for (l=0; l<nc; l++) {
2308             cnt = 0;
2309             for (ii=istart; ii<iend+1; ii++) {
2310               for (jj=jstart; jj<jend+1; jj++) {
2311                 for (kk=kstart; kk<kend+1; kk++) {
2312                   if (ii || jj || kk) {
2313                     if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
2314                       for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2315                     }
2316                   } else {
2317                     if (dfill) {
2318                       for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2319                     } else {
2320                       for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2321                     }
2322                   }
2323                 }
2324               }
2325             }
2326             row  = l + nc*(slot);
2327             PetscCall(MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES));
2328           }
2329         }
2330       }
2331     }
2332     PetscCall(PetscFree(values));
2333     /* do not copy values to GPU since they are all zero and not yet needed there */
2334     PetscCall(MatBindToCPU(J,PETSC_TRUE));
2335     PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY));
2336     PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY));
2337     PetscCall(MatBindToCPU(J,PETSC_FALSE));
2338     PetscCall(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
2339   }
2340   PetscCall(PetscFree(cols));
2341   PetscFunctionReturn(0);
2342 }
2343