xref: /petsc/src/dm/impls/da/fdda.c (revision 2da392cc7c10228af19ad9843ce5155178acb644)
1 
2 #include <petsc/private/dmdaimpl.h> /*I      "petscdmda.h"     I*/
3 #include <petscmat.h>
4 
5 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM,ISColoringType,ISColoring*);
6 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM,ISColoringType,ISColoring*);
7 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM,ISColoringType,ISColoring*);
8 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM,ISColoringType,ISColoring*);
9 
10 /*
11    For ghost i that may be negative or greater than the upper bound this
12   maps it into the 0:m-1 range using periodicity
13 */
14 #define SetInRange(i,m) ((i < 0) ? m+i : ((i >= m) ? i-m : i))
15 
16 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill,PetscInt w,PetscInt **rfill)
17 {
18   PetscErrorCode ierr;
19   PetscInt       i,j,nz,*fill;
20 
21   PetscFunctionBegin;
22   if (!dfill) PetscFunctionReturn(0);
23 
24   /* count number nonzeros */
25   nz = 0;
26   for (i=0; i<w; i++) {
27     for (j=0; j<w; j++) {
28       if (dfill[w*i+j]) nz++;
29     }
30   }
31   ierr = PetscMalloc1(nz + w + 1,&fill);CHKERRQ(ierr);
32   /* construct modified CSR storage of nonzero structure */
33   /*  fill[0 -- w] marks starts of each row of column indices (and end of last row)
34    so fill[1] - fill[0] gives number of nonzeros in first row etc */
35   nz = w + 1;
36   for (i=0; i<w; i++) {
37     fill[i] = nz;
38     for (j=0; j<w; j++) {
39       if (dfill[w*i+j]) {
40         fill[nz] = j;
41         nz++;
42       }
43     }
44   }
45   fill[w] = nz;
46 
47   *rfill = fill;
48   PetscFunctionReturn(0);
49 }
50 
51 
52 static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse,PetscInt w,PetscInt **rfill)
53 {
54   PetscErrorCode ierr;
55   PetscInt       nz;
56 
57   PetscFunctionBegin;
58   if (!dfillsparse) PetscFunctionReturn(0);
59 
60   /* Determine number of non-zeros */
61   nz = (dfillsparse[w] - w - 1);
62 
63   /* Allocate space for our copy of the given sparse matrix representation. */
64   ierr = PetscMalloc1(nz + w + 1,rfill);CHKERRQ(ierr);
65   ierr = PetscArraycpy(*rfill,dfillsparse,nz+w+1);CHKERRQ(ierr);
66   PetscFunctionReturn(0);
67 }
68 
69 
70 static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd)
71 {
72   PetscErrorCode ierr;
73   PetscInt       i,k,cnt = 1;
74 
75   PetscFunctionBegin;
76 
77   /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of
78    columns to the left with any nonzeros in them plus 1 */
79   ierr = PetscCalloc1(dd->w,&dd->ofillcols);CHKERRQ(ierr);
80   for (i=0; i<dd->w; i++) {
81     for (k=dd->ofill[i]; k<dd->ofill[i+1]; k++) dd->ofillcols[dd->ofill[k]] = 1;
82   }
83   for (i=0; i<dd->w; i++) {
84     if (dd->ofillcols[i]) {
85       dd->ofillcols[i] = cnt++;
86     }
87   }
88   PetscFunctionReturn(0);
89 }
90 
91 
92 
93 /*@
94     DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem
95     of the matrix returned by DMCreateMatrix().
96 
97     Logically Collective on da
98 
99     Input Parameter:
100 +   da - the distributed array
101 .   dfill - the fill pattern in the diagonal block (may be NULL, means use dense block)
102 -   ofill - the fill pattern in the off-diagonal blocks
103 
104 
105     Level: developer
106 
107     Notes:
108     This only makes sense when you are doing multicomponent problems but using the
109        MPIAIJ matrix format
110 
111            The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries
112        representing coupling and 0 entries for missing coupling. For example
113 $             dfill[9] = {1, 0, 0,
114 $                         1, 1, 0,
115 $                         0, 1, 1}
116        means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with
117        itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the
118        diagonal block).
119 
120      DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
121      can be represented in the dfill, ofill format
122 
123    Contributed by Glenn Hammond
124 
125 .seealso DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly()
126 
127 @*/
128 PetscErrorCode  DMDASetBlockFills(DM da,const PetscInt *dfill,const PetscInt *ofill)
129 {
130   DM_DA          *dd = (DM_DA*)da->data;
131   PetscErrorCode ierr;
132 
133   PetscFunctionBegin;
134   /* save the given dfill and ofill information */
135   ierr = DMDASetBlockFills_Private(dfill,dd->w,&dd->dfill);CHKERRQ(ierr);
136   ierr = DMDASetBlockFills_Private(ofill,dd->w,&dd->ofill);CHKERRQ(ierr);
137 
138   /* count nonzeros in ofill columns */
139   ierr = DMDASetBlockFills_Private2(dd);CHKERRQ(ierr);
140 
141   PetscFunctionReturn(0);
142 }
143 
144 
145 /*@
146     DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem
147     of the matrix returned by DMCreateMatrix(), using sparse representations
148     of fill patterns.
149 
150     Logically Collective on da
151 
152     Input Parameter:
153 +   da - the distributed array
154 .   dfill - the sparse fill pattern in the diagonal block (may be NULL, means use dense block)
155 -   ofill - the sparse fill pattern in the off-diagonal blocks
156 
157 
158     Level: developer
159 
160     Notes: This only makes sense when you are doing multicomponent problems but using the
161        MPIAIJ matrix format
162 
163            The format for dfill and ofill is a sparse representation of a
164            dof-by-dof matrix with 1 entries representing coupling and 0 entries
165            for missing coupling.  The sparse representation is a 1 dimensional
166            array of length nz + dof + 1, where nz is the number of non-zeros in
167            the matrix.  The first dof entries in the array give the
168            starting array indices of each row's items in the rest of the array,
169            the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array)
170            and the remaining nz items give the column indices of each of
171            the 1s within the logical 2D matrix.  Each row's items within
172            the array are the column indices of the 1s within that row
173            of the 2D matrix.  PETSc developers may recognize that this is the
174            same format as that computed by the DMDASetBlockFills_Private()
175            function from a dense 2D matrix representation.
176 
177      DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
178      can be represented in the dfill, ofill format
179 
180    Contributed by Philip C. Roth
181 
182 .seealso DMDASetBlockFills(), DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly()
183 
184 @*/
185 PetscErrorCode  DMDASetBlockFillsSparse(DM da,const PetscInt *dfillsparse,const PetscInt *ofillsparse)
186 {
187   DM_DA          *dd = (DM_DA*)da->data;
188   PetscErrorCode ierr;
189 
190   PetscFunctionBegin;
191   /* save the given dfill and ofill information */
192   ierr = DMDASetBlockFillsSparse_Private(dfillsparse,dd->w,&dd->dfill);CHKERRQ(ierr);
193   ierr = DMDASetBlockFillsSparse_Private(ofillsparse,dd->w,&dd->ofill);CHKERRQ(ierr);
194 
195   /* count nonzeros in ofill columns */
196   ierr = DMDASetBlockFills_Private2(dd);CHKERRQ(ierr);
197 
198   PetscFunctionReturn(0);
199 }
200 
201 
202 PetscErrorCode  DMCreateColoring_DA(DM da,ISColoringType ctype,ISColoring *coloring)
203 {
204   PetscErrorCode   ierr;
205   PetscInt         dim,m,n,p,nc;
206   DMBoundaryType   bx,by,bz;
207   MPI_Comm         comm;
208   PetscMPIInt      size;
209   PetscBool        isBAIJ;
210   DM_DA            *dd = (DM_DA*)da->data;
211 
212   PetscFunctionBegin;
213   /*
214                                   m
215           ------------------------------------------------------
216          |                                                     |
217          |                                                     |
218          |               ----------------------                |
219          |               |                    |                |
220       n  |           yn  |                    |                |
221          |               |                    |                |
222          |               .---------------------                |
223          |             (xs,ys)     xn                          |
224          |            .                                        |
225          |         (gxs,gys)                                   |
226          |                                                     |
227           -----------------------------------------------------
228   */
229 
230   /*
231          nc - number of components per grid point
232          col - number of colors needed in one direction for single component problem
233 
234   */
235   ierr = DMDAGetInfo(da,&dim,NULL,NULL,NULL,&m,&n,&p,&nc,NULL,&bx,&by,&bz,NULL);CHKERRQ(ierr);
236 
237   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
238   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
239   if (ctype == IS_COLORING_LOCAL) {
240     if (size == 1) {
241       ctype = IS_COLORING_GLOBAL;
242     } else if (dim > 1) {
243       if ((m==1 && bx == DM_BOUNDARY_PERIODIC) || (n==1 && by == DM_BOUNDARY_PERIODIC) || (p==1 && bz == DM_BOUNDARY_PERIODIC)) {
244         SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain  on the same process");
245       }
246     }
247   }
248 
249   /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ
250      matrices is for the blocks, not the individual matrix elements  */
251   ierr = PetscStrbeginswith(da->mattype,MATBAIJ,&isBAIJ);CHKERRQ(ierr);
252   if (!isBAIJ) {ierr = PetscStrbeginswith(da->mattype,MATMPIBAIJ,&isBAIJ);CHKERRQ(ierr);}
253   if (!isBAIJ) {ierr = PetscStrbeginswith(da->mattype,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);}
254   if (isBAIJ) {
255     dd->w  = 1;
256     dd->xs = dd->xs/nc;
257     dd->xe = dd->xe/nc;
258     dd->Xs = dd->Xs/nc;
259     dd->Xe = dd->Xe/nc;
260   }
261 
262   /*
263      We do not provide a getcoloring function in the DMDA operations because
264    the basic DMDA does not know about matrices. We think of DMDA as being
265    more low-level then matrices.
266   */
267   if (dim == 1) {
268     ierr = DMCreateColoring_DA_1d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
269   } else if (dim == 2) {
270     ierr = DMCreateColoring_DA_2d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
271   } else if (dim == 3) {
272     ierr = DMCreateColoring_DA_3d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
273   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not done for %D dimension, send us mail petsc-maint@mcs.anl.gov for code",dim);
274   if (isBAIJ) {
275     dd->w  = nc;
276     dd->xs = dd->xs*nc;
277     dd->xe = dd->xe*nc;
278     dd->Xs = dd->Xs*nc;
279     dd->Xe = dd->Xe*nc;
280   }
281   PetscFunctionReturn(0);
282 }
283 
284 /* ---------------------------------------------------------------------------------*/
285 
286 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
287 {
288   PetscErrorCode  ierr;
289   PetscInt        xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,M,N,dim,s,k,nc,col;
290   PetscInt        ncolors;
291   MPI_Comm        comm;
292   DMBoundaryType  bx,by;
293   DMDAStencilType st;
294   ISColoringValue *colors;
295   DM_DA           *dd = (DM_DA*)da->data;
296 
297   PetscFunctionBegin;
298   /*
299          nc - number of components per grid point
300          col - number of colors needed in one direction for single component problem
301 
302   */
303   ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,&M,&N,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr);
304   col  = 2*s + 1;
305   ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr);
306   ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr);
307   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
308 
309   /* special case as taught to us by Paul Hovland */
310   if (st == DMDA_STENCIL_STAR && s == 1) {
311     ierr = DMCreateColoring_DA_2d_5pt_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
312   } else {
313 
314     if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ2(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X (%d) is divisible\n\
315                                                             by 2*stencil_width + 1 (%d)\n", m, col);
316     if (by == DM_BOUNDARY_PERIODIC && (n % col)) SETERRQ2(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y (%d) is divisible\n\
317                                                             by 2*stencil_width + 1 (%d)\n", n, col);
318     if (ctype == IS_COLORING_GLOBAL) {
319       if (!dd->localcoloring) {
320         ierr = PetscMalloc1(nc*nx*ny,&colors);CHKERRQ(ierr);
321         ii   = 0;
322         for (j=ys; j<ys+ny; j++) {
323           for (i=xs; i<xs+nx; i++) {
324             for (k=0; k<nc; k++) {
325               colors[ii++] = k + nc*((i % col) + col*(j % col));
326             }
327           }
328         }
329         ncolors = nc + nc*(col-1 + col*(col-1));
330         ierr    = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
331       }
332       *coloring = dd->localcoloring;
333     } else if (ctype == IS_COLORING_LOCAL) {
334       if (!dd->ghostedcoloring) {
335         ierr = PetscMalloc1(nc*gnx*gny,&colors);CHKERRQ(ierr);
336         ii   = 0;
337         for (j=gys; j<gys+gny; j++) {
338           for (i=gxs; i<gxs+gnx; i++) {
339             for (k=0; k<nc; k++) {
340               /* the complicated stuff is to handle periodic boundaries */
341               colors[ii++] = k + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col));
342             }
343           }
344         }
345         ncolors = nc + nc*(col - 1 + col*(col-1));
346         ierr    = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
347         /* PetscIntView(ncolors,(PetscInt*)colors,0); */
348 
349         ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
350       }
351       *coloring = dd->ghostedcoloring;
352     } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
353   }
354   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
355   PetscFunctionReturn(0);
356 }
357 
358 /* ---------------------------------------------------------------------------------*/
359 
360 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
361 {
362   PetscErrorCode  ierr;
363   PetscInt        xs,ys,nx,ny,i,j,gxs,gys,gnx,gny,m,n,p,dim,s,k,nc,col,zs,gzs,ii,l,nz,gnz,M,N,P;
364   PetscInt        ncolors;
365   MPI_Comm        comm;
366   DMBoundaryType  bx,by,bz;
367   DMDAStencilType st;
368   ISColoringValue *colors;
369   DM_DA           *dd = (DM_DA*)da->data;
370 
371   PetscFunctionBegin;
372   /*
373          nc - number of components per grid point
374          col - number of colors needed in one direction for single component problem
375 
376   */
377   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
378   col  = 2*s + 1;
379   if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
380                                                          by 2*stencil_width + 1\n");
381   if (by == DM_BOUNDARY_PERIODIC && (n % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
382                                                          by 2*stencil_width + 1\n");
383   if (bz == DM_BOUNDARY_PERIODIC && (p % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
384                                                          by 2*stencil_width + 1\n");
385 
386   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
387   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
388   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
389 
390   /* create the coloring */
391   if (ctype == IS_COLORING_GLOBAL) {
392     if (!dd->localcoloring) {
393       ierr = PetscMalloc1(nc*nx*ny*nz,&colors);CHKERRQ(ierr);
394       ii   = 0;
395       for (k=zs; k<zs+nz; k++) {
396         for (j=ys; j<ys+ny; j++) {
397           for (i=xs; i<xs+nx; i++) {
398             for (l=0; l<nc; l++) {
399               colors[ii++] = l + nc*((i % col) + col*(j % col) + col*col*(k % col));
400             }
401           }
402         }
403       }
404       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
405       ierr    = ISColoringCreate(comm,ncolors,nc*nx*ny*nz,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
406     }
407     *coloring = dd->localcoloring;
408   } else if (ctype == IS_COLORING_LOCAL) {
409     if (!dd->ghostedcoloring) {
410       ierr = PetscMalloc1(nc*gnx*gny*gnz,&colors);CHKERRQ(ierr);
411       ii   = 0;
412       for (k=gzs; k<gzs+gnz; k++) {
413         for (j=gys; j<gys+gny; j++) {
414           for (i=gxs; i<gxs+gnx; i++) {
415             for (l=0; l<nc; l++) {
416               /* the complicated stuff is to handle periodic boundaries */
417               colors[ii++] = l + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col) + col*col*(SetInRange(k,p) % col));
418             }
419           }
420         }
421       }
422       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
423       ierr    = ISColoringCreate(comm,ncolors,nc*gnx*gny*gnz,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
424       ierr    = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
425     }
426     *coloring = dd->ghostedcoloring;
427   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
428   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
429   PetscFunctionReturn(0);
430 }
431 
432 /* ---------------------------------------------------------------------------------*/
433 
434 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
435 {
436   PetscErrorCode  ierr;
437   PetscInt        xs,nx,i,i1,gxs,gnx,l,m,M,dim,s,nc,col;
438   PetscInt        ncolors;
439   MPI_Comm        comm;
440   DMBoundaryType  bx;
441   ISColoringValue *colors;
442   DM_DA           *dd = (DM_DA*)da->data;
443 
444   PetscFunctionBegin;
445   /*
446          nc - number of components per grid point
447          col - number of colors needed in one direction for single component problem
448 
449   */
450   ierr = DMDAGetInfo(da,&dim,&m,NULL,NULL,&M,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);CHKERRQ(ierr);
451   col  = 2*s + 1;
452 
453   if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ2(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points %d is divisible\n\
454                                                           by 2*stencil_width + 1 %d\n",(int)m,(int)col);
455 
456   ierr = DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);CHKERRQ(ierr);
457   ierr = DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);CHKERRQ(ierr);
458   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
459 
460   /* create the coloring */
461   if (ctype == IS_COLORING_GLOBAL) {
462     if (!dd->localcoloring) {
463       ierr = PetscMalloc1(nc*nx,&colors);CHKERRQ(ierr);
464       if (dd->ofillcols) {
465         PetscInt tc = 0;
466         for (i=0; i<nc; i++) tc += (PetscInt) (dd->ofillcols[i] > 0);
467         i1 = 0;
468         for (i=xs; i<xs+nx; i++) {
469           for (l=0; l<nc; l++) {
470             if (dd->ofillcols[l] && (i % col)) {
471               colors[i1++] =  nc - 1 + tc*((i % col) - 1) + dd->ofillcols[l];
472             } else {
473               colors[i1++] = l;
474             }
475           }
476         }
477         ncolors = nc + 2*s*tc;
478       } else {
479         i1 = 0;
480         for (i=xs; i<xs+nx; i++) {
481           for (l=0; l<nc; l++) {
482             colors[i1++] = l + nc*(i % col);
483           }
484         }
485         ncolors = nc + nc*(col-1);
486       }
487       ierr = ISColoringCreate(comm,ncolors,nc*nx,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
488     }
489     *coloring = dd->localcoloring;
490   } else if (ctype == IS_COLORING_LOCAL) {
491     if (!dd->ghostedcoloring) {
492       ierr = PetscMalloc1(nc*gnx,&colors);CHKERRQ(ierr);
493       i1   = 0;
494       for (i=gxs; i<gxs+gnx; i++) {
495         for (l=0; l<nc; l++) {
496           /* the complicated stuff is to handle periodic boundaries */
497           colors[i1++] = l + nc*(SetInRange(i,m) % col);
498         }
499       }
500       ncolors = nc + nc*(col-1);
501       ierr    = ISColoringCreate(comm,ncolors,nc*gnx,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
502       ierr    = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
503     }
504     *coloring = dd->ghostedcoloring;
505   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
506   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
507   PetscFunctionReturn(0);
508 }
509 
510 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
511 {
512   PetscErrorCode  ierr;
513   PetscInt        xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,dim,s,k,nc;
514   PetscInt        ncolors;
515   MPI_Comm        comm;
516   DMBoundaryType  bx,by;
517   ISColoringValue *colors;
518   DM_DA           *dd = (DM_DA*)da->data;
519 
520   PetscFunctionBegin;
521   /*
522          nc - number of components per grid point
523          col - number of colors needed in one direction for single component problem
524 
525   */
526   ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,NULL);CHKERRQ(ierr);
527   ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr);
528   ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr);
529   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
530 
531   if (bx == DM_BOUNDARY_PERIODIC && (m % 5)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible by 5\n");
532   if (by == DM_BOUNDARY_PERIODIC && (n % 5)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible by 5\n");
533 
534   /* create the coloring */
535   if (ctype == IS_COLORING_GLOBAL) {
536     if (!dd->localcoloring) {
537       ierr = PetscMalloc1(nc*nx*ny,&colors);CHKERRQ(ierr);
538       ii   = 0;
539       for (j=ys; j<ys+ny; j++) {
540         for (i=xs; i<xs+nx; i++) {
541           for (k=0; k<nc; k++) {
542             colors[ii++] = k + nc*((3*j+i) % 5);
543           }
544         }
545       }
546       ncolors = 5*nc;
547       ierr    = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
548     }
549     *coloring = dd->localcoloring;
550   } else if (ctype == IS_COLORING_LOCAL) {
551     if (!dd->ghostedcoloring) {
552       ierr = PetscMalloc1(nc*gnx*gny,&colors);CHKERRQ(ierr);
553       ii = 0;
554       for (j=gys; j<gys+gny; j++) {
555         for (i=gxs; i<gxs+gnx; i++) {
556           for (k=0; k<nc; k++) {
557             colors[ii++] = k + nc*((3*SetInRange(j,n) + SetInRange(i,m)) % 5);
558           }
559         }
560       }
561       ncolors = 5*nc;
562       ierr    = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
563       ierr    = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
564     }
565     *coloring = dd->ghostedcoloring;
566   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
567   PetscFunctionReturn(0);
568 }
569 
570 /* =========================================================================== */
571 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM,Mat,PetscBool);
572 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM,Mat);
573 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM,Mat,PetscBool);
574 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM,Mat);
575 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM,Mat,PetscBool);
576 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM,Mat);
577 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM,Mat);
578 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM,Mat);
579 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM,Mat);
580 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM,Mat);
581 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM,Mat);
582 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM,Mat);
583 extern PetscErrorCode DMCreateMatrix_DA_IS(DM,Mat);
584 
585 /*@C
586    MatSetupDM - Sets the DMDA that is to be used by the HYPRE_StructMatrix PETSc matrix
587 
588    Logically Collective on mat
589 
590    Input Parameters:
591 +  mat - the matrix
592 -  da - the da
593 
594    Level: intermediate
595 
596 @*/
597 PetscErrorCode MatSetupDM(Mat mat,DM da)
598 {
599   PetscErrorCode ierr;
600 
601   PetscFunctionBegin;
602   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
603   PetscValidHeaderSpecificType(da,DM_CLASSID,1,DMDA);
604   ierr = PetscTryMethod(mat,"MatSetupDM_C",(Mat,DM),(mat,da));CHKERRQ(ierr);
605   PetscFunctionReturn(0);
606 }
607 
608 PetscErrorCode  MatView_MPI_DA(Mat A,PetscViewer viewer)
609 {
610   DM                da;
611   PetscErrorCode    ierr;
612   const char        *prefix;
613   Mat               Anatural;
614   AO                ao;
615   PetscInt          rstart,rend,*petsc,i;
616   IS                is;
617   MPI_Comm          comm;
618   PetscViewerFormat format;
619 
620   PetscFunctionBegin;
621   /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */
622   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
623   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(0);
624 
625   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
626   ierr = MatGetDM(A, &da);CHKERRQ(ierr);
627   if (!da) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
628 
629   ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);
630   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
631   ierr = PetscMalloc1(rend-rstart,&petsc);CHKERRQ(ierr);
632   for (i=rstart; i<rend; i++) petsc[i-rstart] = i;
633   ierr = AOApplicationToPetsc(ao,rend-rstart,petsc);CHKERRQ(ierr);
634   ierr = ISCreateGeneral(comm,rend-rstart,petsc,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
635 
636   /* call viewer on natural ordering */
637   ierr = MatCreateSubMatrix(A,is,is,MAT_INITIAL_MATRIX,&Anatural);CHKERRQ(ierr);
638   ierr = ISDestroy(&is);CHKERRQ(ierr);
639   ierr = PetscObjectGetOptionsPrefix((PetscObject)A,&prefix);CHKERRQ(ierr);
640   ierr = PetscObjectSetOptionsPrefix((PetscObject)Anatural,prefix);CHKERRQ(ierr);
641   ierr = PetscObjectSetName((PetscObject)Anatural,((PetscObject)A)->name);CHKERRQ(ierr);
642   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE;
643   ierr = MatView(Anatural,viewer);CHKERRQ(ierr);
644   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE;
645   ierr = MatDestroy(&Anatural);CHKERRQ(ierr);
646   PetscFunctionReturn(0);
647 }
648 
649 PetscErrorCode  MatLoad_MPI_DA(Mat A,PetscViewer viewer)
650 {
651   DM             da;
652   PetscErrorCode ierr;
653   Mat            Anatural,Aapp;
654   AO             ao;
655   PetscInt       rstart,rend,*app,i,m,n,M,N;
656   IS             is;
657   MPI_Comm       comm;
658 
659   PetscFunctionBegin;
660   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
661   ierr = MatGetDM(A, &da);CHKERRQ(ierr);
662   if (!da) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
663 
664   /* Load the matrix in natural ordering */
665   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Anatural);CHKERRQ(ierr);
666   ierr = MatSetType(Anatural,((PetscObject)A)->type_name);CHKERRQ(ierr);
667   ierr = MatGetSize(A,&M,&N);CHKERRQ(ierr);
668   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
669   ierr = MatSetSizes(Anatural,m,n,M,N);CHKERRQ(ierr);
670   ierr = MatLoad(Anatural,viewer);CHKERRQ(ierr);
671 
672   /* Map natural ordering to application ordering and create IS */
673   ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);
674   ierr = MatGetOwnershipRange(Anatural,&rstart,&rend);CHKERRQ(ierr);
675   ierr = PetscMalloc1(rend-rstart,&app);CHKERRQ(ierr);
676   for (i=rstart; i<rend; i++) app[i-rstart] = i;
677   ierr = AOPetscToApplication(ao,rend-rstart,app);CHKERRQ(ierr);
678   ierr = ISCreateGeneral(comm,rend-rstart,app,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
679 
680   /* Do permutation and replace header */
681   ierr = MatCreateSubMatrix(Anatural,is,is,MAT_INITIAL_MATRIX,&Aapp);CHKERRQ(ierr);
682   ierr = MatHeaderReplace(A,&Aapp);CHKERRQ(ierr);
683   ierr = ISDestroy(&is);CHKERRQ(ierr);
684   ierr = MatDestroy(&Anatural);CHKERRQ(ierr);
685   PetscFunctionReturn(0);
686 }
687 
688 PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J)
689 {
690   PetscErrorCode ierr;
691   PetscInt       dim,dof,nx,ny,nz,dims[3],starts[3],M,N,P;
692   Mat            A;
693   MPI_Comm       comm;
694   MatType        Atype;
695   void           (*aij)(void)=NULL,(*baij)(void)=NULL,(*sbaij)(void)=NULL,(*sell)(void)=NULL,(*is)(void)=NULL;
696   MatType        mtype;
697   PetscMPIInt    size;
698   DM_DA          *dd = (DM_DA*)da->data;
699 
700   PetscFunctionBegin;
701   ierr = MatInitializePackage();CHKERRQ(ierr);
702   mtype = da->mattype;
703 
704   /*
705                                   m
706           ------------------------------------------------------
707          |                                                     |
708          |                                                     |
709          |               ----------------------                |
710          |               |                    |                |
711       n  |           ny  |                    |                |
712          |               |                    |                |
713          |               .---------------------                |
714          |             (xs,ys)     nx                          |
715          |            .                                        |
716          |         (gxs,gys)                                   |
717          |                                                     |
718           -----------------------------------------------------
719   */
720 
721   /*
722          nc - number of components per grid point
723          col - number of colors needed in one direction for single component problem
724 
725   */
726   M   = dd->M;
727   N   = dd->N;
728   P   = dd->P;
729   dim = da->dim;
730   dof = dd->w;
731   /* ierr = DMDAGetInfo(da,&dim,&M,&N,&P,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL);CHKERRQ(ierr); */
732   ierr = DMDAGetCorners(da,NULL,NULL,NULL,&nx,&ny,&nz);CHKERRQ(ierr);
733   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
734   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
735   ierr = MatSetSizes(A,dof*nx*ny*nz,dof*nx*ny*nz,dof*M*N*P,dof*M*N*P);CHKERRQ(ierr);
736   ierr = MatSetType(A,mtype);CHKERRQ(ierr);
737   ierr = MatSetFromOptions(A);CHKERRQ(ierr);
738   ierr = MatSetDM(A,da);CHKERRQ(ierr);
739   if (da->structure_only) {
740     ierr = MatSetOption(A,MAT_STRUCTURE_ONLY,PETSC_TRUE);CHKERRQ(ierr);
741   }
742   ierr = MatGetType(A,&Atype);CHKERRQ(ierr);
743   /*
744      We do not provide a getmatrix function in the DMDA operations because
745    the basic DMDA does not know about matrices. We think of DMDA as being more
746    more low-level than matrices. This is kind of cheating but, cause sometimes
747    we think of DMDA has higher level than matrices.
748 
749      We could switch based on Atype (or mtype), but we do not since the
750    specialized setting routines depend only on the particular preallocation
751    details of the matrix, not the type itself.
752   */
753   ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJSetPreallocation_C",&aij);CHKERRQ(ierr);
754   if (!aij) {
755     ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJSetPreallocation_C",&aij);CHKERRQ(ierr);
756   }
757   if (!aij) {
758     ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIBAIJSetPreallocation_C",&baij);CHKERRQ(ierr);
759     if (!baij) {
760       ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",&baij);CHKERRQ(ierr);
761     }
762     if (!baij) {
763       ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPISBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr);
764       if (!sbaij) {
765         ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr);
766       }
767       if (!sbaij) {
768         ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPISELLSetPreallocation_C",&sell);CHKERRQ(ierr);
769         if (!sell) {
770           ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqSELLSetPreallocation_C",&sell);CHKERRQ(ierr);
771         }
772       }
773       if (!sell) {
774         ierr = PetscObjectQueryFunction((PetscObject)A,"MatISSetPreallocation_C",&is);CHKERRQ(ierr);
775       }
776     }
777   }
778   if (aij) {
779     if (dim == 1) {
780       if (dd->ofill) {
781         ierr = DMCreateMatrix_DA_1d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
782       } else {
783         ierr = DMCreateMatrix_DA_1d_MPIAIJ(da,A,PETSC_FALSE);CHKERRQ(ierr);
784       }
785     } else if (dim == 2) {
786       if (dd->ofill) {
787         ierr = DMCreateMatrix_DA_2d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
788       } else {
789         ierr = DMCreateMatrix_DA_2d_MPIAIJ(da,A,PETSC_FALSE);CHKERRQ(ierr);
790       }
791     } else if (dim == 3) {
792       if (dd->ofill) {
793         ierr = DMCreateMatrix_DA_3d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
794       } else {
795         ierr = DMCreateMatrix_DA_3d_MPIAIJ(da,A,PETSC_FALSE);CHKERRQ(ierr);
796       }
797     }
798   } else if (baij) {
799     if (dim == 2) {
800       ierr = DMCreateMatrix_DA_2d_MPIBAIJ(da,A);CHKERRQ(ierr);
801     } else if (dim == 3) {
802       ierr = DMCreateMatrix_DA_3d_MPIBAIJ(da,A);CHKERRQ(ierr);
803     } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
804   } else if (sbaij) {
805     if (dim == 2) {
806       ierr = DMCreateMatrix_DA_2d_MPISBAIJ(da,A);CHKERRQ(ierr);
807     } else if (dim == 3) {
808       ierr = DMCreateMatrix_DA_3d_MPISBAIJ(da,A);CHKERRQ(ierr);
809     } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
810   } else if (sell) {
811      if (dim == 2) {
812        ierr = DMCreateMatrix_DA_2d_MPISELL(da,A);CHKERRQ(ierr);
813      } else if (dim == 3) {
814        ierr = DMCreateMatrix_DA_3d_MPISELL(da,A);CHKERRQ(ierr);
815      } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
816   } else if (is) {
817     ierr = DMCreateMatrix_DA_IS(da,A);CHKERRQ(ierr);
818   } else {
819     ISLocalToGlobalMapping ltog;
820 
821     ierr = MatSetBlockSize(A,dof);CHKERRQ(ierr);
822     ierr = MatSetUp(A);CHKERRQ(ierr);
823     ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
824     ierr = MatSetLocalToGlobalMapping(A,ltog,ltog);CHKERRQ(ierr);
825   }
826   ierr = DMDAGetGhostCorners(da,&starts[0],&starts[1],&starts[2],&dims[0],&dims[1],&dims[2]);CHKERRQ(ierr);
827   ierr = MatSetStencil(A,dim,dims,starts,dof);CHKERRQ(ierr);
828   ierr = MatSetDM(A,da);CHKERRQ(ierr);
829   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
830   if (size > 1) {
831     /* change viewer to display matrix in natural ordering */
832     ierr = MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA);CHKERRQ(ierr);
833     ierr = MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA);CHKERRQ(ierr);
834   }
835   *J = A;
836   PetscFunctionReturn(0);
837 }
838 
839 /* ---------------------------------------------------------------------------------*/
840 PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
841 
842 PetscErrorCode DMCreateMatrix_DA_IS(DM dm,Mat J)
843 {
844   DM_DA                  *da = (DM_DA*)dm->data;
845   Mat                    lJ;
846   ISLocalToGlobalMapping ltog;
847   IS                     is_loc_filt, is_glob;
848   const PetscInt         *e_loc,*idx;
849   PetscInt               nel,nen,nv,dof,dim,*gidx,nb;
850   PetscBool              flg;
851   PetscErrorCode         ierr;
852 
853   /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted)
854      We need to filter the local indices that are represented through the DMDAGetElements decomposition
855      This is because the size of the local matrices in MATIS is the local size of the l2g map */
856   PetscFunctionBegin;
857   dof  = da->w;
858   dim  = dm->dim;
859 
860   ierr = MatSetBlockSize(J,dof);CHKERRQ(ierr);
861 
862   /* get local elements indices in local DMDA numbering */
863   ierr = DMDAGetElements(dm,&nel,&nen,&e_loc);CHKERRQ(ierr); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */
864   ierr = ISCreateBlock(PetscObjectComm((PetscObject)dm),dof,nel*nen,e_loc,PETSC_COPY_VALUES,&is_loc_filt);CHKERRQ(ierr);
865   ierr = DMDARestoreElements(dm,&nel,&nen,&e_loc);CHKERRQ(ierr);
866 
867   /* obtain a consistent local ordering for MATIS */
868   ierr = ISSortRemoveDups(is_loc_filt);CHKERRQ(ierr);
869   ierr = ISBlockGetLocalSize(is_loc_filt,&nb);CHKERRQ(ierr);
870   ierr = DMGetLocalToGlobalMapping(dm,&ltog);CHKERRQ(ierr);
871   ierr = ISLocalToGlobalMappingGetSize(ltog,&nv);CHKERRQ(ierr);
872   ierr = PetscMalloc1(PetscMax(nb,nv/dof),&gidx);CHKERRQ(ierr);
873   ierr = ISBlockGetIndices(is_loc_filt,&idx);CHKERRQ(ierr);
874   ierr = ISLocalToGlobalMappingApplyBlock(ltog,nb,idx,gidx);CHKERRQ(ierr);
875   ierr = ISBlockRestoreIndices(is_loc_filt,&idx);CHKERRQ(ierr);
876   ierr = ISCreateBlock(PetscObjectComm((PetscObject)dm),dof,nb,gidx,PETSC_USE_POINTER,&is_glob);CHKERRQ(ierr);
877   ierr = ISLocalToGlobalMappingCreateIS(is_glob,&ltog);CHKERRQ(ierr);
878   ierr = ISDestroy(&is_glob);CHKERRQ(ierr);
879   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
880   ierr = ISLocalToGlobalMappingDestroy(&ltog);CHKERRQ(ierr);
881 
882   /* We also attach a l2g map to the local matrices to have MatSetValueLocal to work */
883   ierr = MatISGetLocalMat(J,&lJ);CHKERRQ(ierr);
884   ierr = ISLocalToGlobalMappingCreateIS(is_loc_filt,&ltog);CHKERRQ(ierr);
885   ierr = ISDestroy(&is_loc_filt);CHKERRQ(ierr);
886   ierr = ISCreateStride(PetscObjectComm((PetscObject)lJ),nv/dof,0,1,&is_glob);CHKERRQ(ierr);
887   ierr = ISGetIndices(is_glob,&idx);CHKERRQ(ierr);
888   ierr = ISGlobalToLocalMappingApplyBlock(ltog,IS_GTOLM_MASK,nv/dof,idx,&nb,gidx);CHKERRQ(ierr);
889   ierr = ISRestoreIndices(is_glob,&idx);CHKERRQ(ierr);
890   ierr = ISDestroy(&is_glob);CHKERRQ(ierr);
891   ierr = ISLocalToGlobalMappingDestroy(&ltog);CHKERRQ(ierr);
892   ierr = ISCreateBlock(PETSC_COMM_SELF,dof,nb,gidx,PETSC_USE_POINTER,&is_loc_filt);CHKERRQ(ierr);
893   ierr = ISLocalToGlobalMappingCreateIS(is_loc_filt,&ltog);CHKERRQ(ierr);
894   ierr = ISDestroy(&is_loc_filt);CHKERRQ(ierr);
895   ierr = MatSetLocalToGlobalMapping(lJ,ltog,ltog);CHKERRQ(ierr);
896   ierr = ISLocalToGlobalMappingDestroy(&ltog);CHKERRQ(ierr);
897   ierr = PetscFree(gidx);CHKERRQ(ierr);
898 
899   /* Preallocation (not exact): we reuse the preallocation routines of the assembled version  */
900   flg = dm->prealloc_only;
901   dm->prealloc_only = PETSC_TRUE;
902   switch (dim) {
903   case 1:
904     ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",MatISSetPreallocation_IS);CHKERRQ(ierr);
905     ierr = DMCreateMatrix_DA_1d_MPIAIJ(dm,J,PETSC_TRUE);CHKERRQ(ierr);
906     ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
907     break;
908   case 2:
909     ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",MatISSetPreallocation_IS);CHKERRQ(ierr);
910     ierr = DMCreateMatrix_DA_2d_MPIAIJ(dm,J,PETSC_TRUE);CHKERRQ(ierr);
911     ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
912     break;
913   case 3:
914     ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",MatISSetPreallocation_IS);CHKERRQ(ierr);
915     ierr = DMCreateMatrix_DA_3d_MPIAIJ(dm,J,PETSC_TRUE);CHKERRQ(ierr);
916     ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
917     break;
918   default:
919     SETERRQ1(PetscObjectComm((PetscObject)dm),PETSC_ERR_SUP,"Unhandled dimension %d",dim);
920     break;
921   }
922   dm->prealloc_only = flg;
923   PetscFunctionReturn(0);
924 }
925 
926 PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da,Mat J)
927 {
928   PetscErrorCode         ierr;
929   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p;
930   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
931   MPI_Comm               comm;
932   PetscScalar            *values;
933   DMBoundaryType         bx,by;
934   ISLocalToGlobalMapping ltog;
935   DMDAStencilType        st;
936 
937   PetscFunctionBegin;
938   /*
939          nc - number of components per grid point
940          col - number of colors needed in one direction for single component problem
941 
942   */
943   ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr);
944   col  = 2*s + 1;
945   ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr);
946   ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr);
947   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
948 
949   ierr = PetscMalloc2(nc,&rows,col*col*nc*nc,&cols);CHKERRQ(ierr);
950   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
951 
952   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
953   /* determine the matrix preallocation information */
954   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
955   for (i=xs; i<xs+nx; i++) {
956 
957     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
958     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
959 
960     for (j=ys; j<ys+ny; j++) {
961       slot = i - gxs + gnx*(j - gys);
962 
963       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
964       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
965 
966       cnt = 0;
967       for (k=0; k<nc; k++) {
968         for (l=lstart; l<lend+1; l++) {
969           for (p=pstart; p<pend+1; p++) {
970             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
971               cols[cnt++] = k + nc*(slot + gnx*l + p);
972             }
973           }
974         }
975         rows[k] = k + nc*(slot);
976       }
977       ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
978     }
979   }
980   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
981   ierr = MatSeqSELLSetPreallocation(J,0,dnz);CHKERRQ(ierr);
982   ierr = MatMPISELLSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
983   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
984 
985   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
986 
987   /*
988     For each node in the grid: we get the neighbors in the local (on processor ordering
989     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
990     PETSc ordering.
991   */
992   if (!da->prealloc_only) {
993     ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr);
994     for (i=xs; i<xs+nx; i++) {
995 
996       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
997       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
998 
999       for (j=ys; j<ys+ny; j++) {
1000         slot = i - gxs + gnx*(j - gys);
1001 
1002         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1003         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1004 
1005         cnt = 0;
1006         for (k=0; k<nc; k++) {
1007           for (l=lstart; l<lend+1; l++) {
1008             for (p=pstart; p<pend+1; p++) {
1009               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
1010                 cols[cnt++] = k + nc*(slot + gnx*l + p);
1011               }
1012             }
1013           }
1014           rows[k] = k + nc*(slot);
1015         }
1016         ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1017       }
1018     }
1019     ierr = PetscFree(values);CHKERRQ(ierr);
1020     /* do not copy values to GPU since they are all zero and not yet needed there */
1021     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1022     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1023     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1024     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1025     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1026   }
1027   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1028   PetscFunctionReturn(0);
1029 }
1030 
1031 PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da,Mat J)
1032 {
1033   PetscErrorCode         ierr;
1034   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1035   PetscInt               m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL;
1036   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
1037   MPI_Comm               comm;
1038   PetscScalar            *values;
1039   DMBoundaryType         bx,by,bz;
1040   ISLocalToGlobalMapping ltog;
1041   DMDAStencilType        st;
1042 
1043   PetscFunctionBegin;
1044   /*
1045          nc - number of components per grid point
1046          col - number of colors needed in one direction for single component problem
1047 
1048   */
1049   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1050   col  = 2*s + 1;
1051   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1052   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1053   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1054 
1055   ierr = PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols);CHKERRQ(ierr);
1056   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1057 
1058   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1059   /* determine the matrix preallocation information */
1060   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1061   for (i=xs; i<xs+nx; i++) {
1062     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1063     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1064     for (j=ys; j<ys+ny; j++) {
1065       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1066       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1067       for (k=zs; k<zs+nz; k++) {
1068         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1069         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1070 
1071         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1072 
1073         cnt = 0;
1074         for (l=0; l<nc; l++) {
1075           for (ii=istart; ii<iend+1; ii++) {
1076             for (jj=jstart; jj<jend+1; jj++) {
1077               for (kk=kstart; kk<kend+1; kk++) {
1078                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1079                   cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1080                 }
1081               }
1082             }
1083           }
1084           rows[l] = l + nc*(slot);
1085         }
1086         ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1087       }
1088     }
1089   }
1090   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1091   ierr = MatSeqSELLSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1092   ierr = MatMPISELLSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1093   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1094   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1095 
1096   /*
1097     For each node in the grid: we get the neighbors in the local (on processor ordering
1098     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1099     PETSc ordering.
1100   */
1101   if (!da->prealloc_only) {
1102     ierr = PetscCalloc1(col*col*col*nc*nc*nc,&values);CHKERRQ(ierr);
1103     for (i=xs; i<xs+nx; i++) {
1104       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1105       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1106       for (j=ys; j<ys+ny; j++) {
1107         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1108         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1109         for (k=zs; k<zs+nz; k++) {
1110           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1111           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1112 
1113           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1114 
1115           cnt = 0;
1116           for (l=0; l<nc; l++) {
1117             for (ii=istart; ii<iend+1; ii++) {
1118               for (jj=jstart; jj<jend+1; jj++) {
1119                 for (kk=kstart; kk<kend+1; kk++) {
1120                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1121                     cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1122                   }
1123                 }
1124               }
1125             }
1126             rows[l] = l + nc*(slot);
1127           }
1128           ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1129         }
1130       }
1131     }
1132     ierr = PetscFree(values);CHKERRQ(ierr);
1133     /* do not copy values to GPU since they are all zero and not yet needed there */
1134     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1135     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1136     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1137     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1138     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1139   }
1140   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1141   PetscFunctionReturn(0);
1142 }
1143 
1144 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da,Mat J,PetscBool isIS)
1145 {
1146   PetscErrorCode         ierr;
1147   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,M,N;
1148   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
1149   MPI_Comm               comm;
1150   DMBoundaryType         bx,by;
1151   ISLocalToGlobalMapping ltog,mltog;
1152   DMDAStencilType        st;
1153   PetscBool              removedups = PETSC_FALSE;
1154 
1155   PetscFunctionBegin;
1156   /*
1157          nc - number of components per grid point
1158          col - number of colors needed in one direction for single component problem
1159 
1160   */
1161   ierr = DMDAGetInfo(da,&dim,&m,&n,&M,&N,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr);
1162   if (!isIS && bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) {
1163     ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE);CHKERRQ(ierr);
1164   }
1165   col  = 2*s + 1;
1166   /*
1167        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1168        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1169   */
1170   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1171   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1172   ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr);
1173   ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr);
1174   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1175 
1176   ierr = PetscMalloc2(nc,&rows,col*col*nc*nc,&cols);CHKERRQ(ierr);
1177   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1178 
1179   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1180   /* determine the matrix preallocation information */
1181   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
1182   for (i=xs; i<xs+nx; i++) {
1183 
1184     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1185     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1186 
1187     for (j=ys; j<ys+ny; j++) {
1188       slot = i - gxs + gnx*(j - gys);
1189 
1190       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1191       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1192 
1193       cnt = 0;
1194       for (k=0; k<nc; k++) {
1195         for (l=lstart; l<lend+1; l++) {
1196           for (p=pstart; p<pend+1; p++) {
1197             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
1198               cols[cnt++] = k + nc*(slot + gnx*l + p);
1199             }
1200           }
1201         }
1202         rows[k] = k + nc*(slot);
1203       }
1204       if (removedups) {
1205         ierr = MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1206       } else {
1207         ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1208       }
1209     }
1210   }
1211   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1212   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1213   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1214   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1215   ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr);
1216   if (!mltog) {
1217     ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1218   }
1219 
1220   /*
1221     For each node in the grid: we get the neighbors in the local (on processor ordering
1222     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1223     PETSc ordering.
1224   */
1225   if (!da->prealloc_only) {
1226     for (i=xs; i<xs+nx; i++) {
1227 
1228       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1229       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1230 
1231       for (j=ys; j<ys+ny; j++) {
1232         slot = i - gxs + gnx*(j - gys);
1233 
1234         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1235         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1236 
1237         cnt = 0;
1238         for (l=lstart; l<lend+1; l++) {
1239           for (p=pstart; p<pend+1; p++) {
1240             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
1241               cols[cnt++] = nc*(slot + gnx*l + p);
1242               for (k=1; k<nc; k++) {
1243                 cols[cnt] = 1 + cols[cnt-1];cnt++;
1244               }
1245             }
1246           }
1247         }
1248         for (k=0; k<nc; k++) rows[k] = k + nc*(slot);
1249         ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr);
1250       }
1251     }
1252     /* do not copy values to GPU since they are all zero and not yet needed there */
1253     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1254     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1255     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1256     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1257     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1258     if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) {
1259       ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr);
1260     }
1261   }
1262   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1263   PetscFunctionReturn(0);
1264 }
1265 
1266 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da,Mat J)
1267 {
1268   PetscErrorCode         ierr;
1269   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1270   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt,maxcnt = 0,l,p,M,N;
1271   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
1272   DM_DA                  *dd = (DM_DA*)da->data;
1273   PetscInt               ifill_col,*ofill = dd->ofill, *dfill = dd->dfill;
1274   MPI_Comm               comm;
1275   DMBoundaryType         bx,by;
1276   ISLocalToGlobalMapping ltog;
1277   DMDAStencilType        st;
1278   PetscBool              removedups = PETSC_FALSE;
1279 
1280   PetscFunctionBegin;
1281   /*
1282          nc - number of components per grid point
1283          col - number of colors needed in one direction for single component problem
1284 
1285   */
1286   ierr = DMDAGetInfo(da,&dim,&m,&n,&M,&N,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr);
1287   col  = 2*s + 1;
1288   /*
1289        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1290        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1291   */
1292   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1293   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1294   ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr);
1295   ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr);
1296   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1297 
1298   ierr = PetscMalloc1(col*col*nc,&cols);CHKERRQ(ierr);
1299   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1300 
1301   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1302   /* determine the matrix preallocation information */
1303   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
1304   for (i=xs; i<xs+nx; i++) {
1305 
1306     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1307     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1308 
1309     for (j=ys; j<ys+ny; j++) {
1310       slot = i - gxs + gnx*(j - gys);
1311 
1312       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1313       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1314 
1315       for (k=0; k<nc; k++) {
1316         cnt = 0;
1317         for (l=lstart; l<lend+1; l++) {
1318           for (p=pstart; p<pend+1; p++) {
1319             if (l || p) {
1320               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
1321                 for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p);
1322               }
1323             } else {
1324               if (dfill) {
1325                 for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p);
1326               } else {
1327                 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p);
1328               }
1329             }
1330           }
1331         }
1332         row    = k + nc*(slot);
1333         maxcnt = PetscMax(maxcnt,cnt);
1334         if (removedups) {
1335           ierr   = MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1336         } else {
1337           ierr   = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1338         }
1339       }
1340     }
1341   }
1342   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1343   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1344   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1345   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1346 
1347   /*
1348     For each node in the grid: we get the neighbors in the local (on processor ordering
1349     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1350     PETSc ordering.
1351   */
1352   if (!da->prealloc_only) {
1353     for (i=xs; i<xs+nx; i++) {
1354 
1355       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1356       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1357 
1358       for (j=ys; j<ys+ny; j++) {
1359         slot = i - gxs + gnx*(j - gys);
1360 
1361         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1362         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1363 
1364         for (k=0; k<nc; k++) {
1365           cnt = 0;
1366           for (l=lstart; l<lend+1; l++) {
1367             for (p=pstart; p<pend+1; p++) {
1368               if (l || p) {
1369                 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
1370                   for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p);
1371                 }
1372               } else {
1373                 if (dfill) {
1374                   for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p);
1375                 } else {
1376                   for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p);
1377                 }
1378               }
1379             }
1380           }
1381           row  = k + nc*(slot);
1382           ierr = MatSetValuesLocal(J,1,&row,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr);
1383         }
1384       }
1385     }
1386     /* do not copy values to GPU since they are all zero and not yet needed there */
1387     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1388     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1389     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1390     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1391     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1392   }
1393   ierr = PetscFree(cols);CHKERRQ(ierr);
1394   PetscFunctionReturn(0);
1395 }
1396 
1397 /* ---------------------------------------------------------------------------------*/
1398 
1399 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da,Mat J,PetscBool isIS)
1400 {
1401   PetscErrorCode         ierr;
1402   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1403   PetscInt               m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL;
1404   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
1405   MPI_Comm               comm;
1406   DMBoundaryType         bx,by,bz;
1407   ISLocalToGlobalMapping ltog,mltog;
1408   DMDAStencilType        st;
1409   PetscBool              removedups = PETSC_FALSE;
1410 
1411   PetscFunctionBegin;
1412   /*
1413          nc - number of components per grid point
1414          col - number of colors needed in one direction for single component problem
1415 
1416   */
1417   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1418   if (!isIS && bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) {
1419     ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE);CHKERRQ(ierr);
1420   }
1421   col  = 2*s + 1;
1422 
1423   /*
1424        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1425        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1426   */
1427   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1428   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1429   if (P == 1 && 2*s >= p) removedups = PETSC_TRUE;
1430 
1431   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1432   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1433   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1434 
1435   ierr = PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols);CHKERRQ(ierr);
1436   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1437 
1438   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1439   /* determine the matrix preallocation information */
1440   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1441   for (i=xs; i<xs+nx; i++) {
1442     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1443     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1444     for (j=ys; j<ys+ny; j++) {
1445       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1446       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1447       for (k=zs; k<zs+nz; k++) {
1448         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1449         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1450 
1451         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1452 
1453         cnt = 0;
1454         for (l=0; l<nc; l++) {
1455           for (ii=istart; ii<iend+1; ii++) {
1456             for (jj=jstart; jj<jend+1; jj++) {
1457               for (kk=kstart; kk<kend+1; kk++) {
1458                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1459                   cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1460                 }
1461               }
1462             }
1463           }
1464           rows[l] = l + nc*(slot);
1465         }
1466         if (removedups) {
1467           ierr = MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1468         } else {
1469           ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1470         }
1471       }
1472     }
1473   }
1474   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1475   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1476   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1477   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1478   ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr);
1479   if (!mltog) {
1480     ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1481   }
1482 
1483   /*
1484     For each node in the grid: we get the neighbors in the local (on processor ordering
1485     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1486     PETSc ordering.
1487   */
1488   if (!da->prealloc_only) {
1489     for (i=xs; i<xs+nx; i++) {
1490       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1491       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1492       for (j=ys; j<ys+ny; j++) {
1493         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1494         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1495         for (k=zs; k<zs+nz; k++) {
1496           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1497           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1498 
1499           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1500 
1501           cnt = 0;
1502           for (kk=kstart; kk<kend+1; kk++) {
1503             for (jj=jstart; jj<jend+1; jj++) {
1504               for (ii=istart; ii<iend+1; ii++) {
1505                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1506                   cols[cnt++] = nc*(slot + ii + gnx*jj + gnx*gny*kk);
1507                     for (l=1; l<nc; l++) {
1508                       cols[cnt] = 1 + cols[cnt-1];cnt++;
1509                   }
1510                 }
1511               }
1512             }
1513           }
1514           rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1];
1515           ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr);
1516         }
1517       }
1518     }
1519     /* do not copy values to GPU since they are all zero and not yet needed there */
1520     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1521     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1522     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1523     if (!isIS && bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) {
1524       ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr);
1525     }
1526     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1527     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1528   }
1529   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1530   PetscFunctionReturn(0);
1531 }
1532 
1533 /* ---------------------------------------------------------------------------------*/
1534 
1535 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da,Mat J)
1536 {
1537   PetscErrorCode         ierr;
1538   DM_DA                  *dd = (DM_DA*)da->data;
1539   PetscInt               xs,nx,i,j,gxs,gnx,row,k,l;
1540   PetscInt               m,dim,s,*cols = NULL,nc,cnt,maxcnt = 0,*ocols;
1541   PetscInt               *ofill = dd->ofill,*dfill = dd->dfill;
1542   DMBoundaryType         bx;
1543   ISLocalToGlobalMapping ltog;
1544   PetscMPIInt            rank,size;
1545 
1546   PetscFunctionBegin;
1547   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)da),&rank);CHKERRQ(ierr);
1548   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)da),&size);CHKERRQ(ierr);
1549 
1550   /*
1551          nc - number of components per grid point
1552 
1553   */
1554   ierr = DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);CHKERRQ(ierr);
1555   if (s > 1) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Matrix creation for 1d not implemented correctly for stencil width larger than 1");
1556   ierr = DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);CHKERRQ(ierr);
1557   ierr = DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);CHKERRQ(ierr);
1558 
1559   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1560   ierr = PetscCalloc2(nx*nc,&cols,nx*nc,&ocols);CHKERRQ(ierr);
1561 
1562   /*
1563         note should be smaller for first and last process with no periodic
1564         does not handle dfill
1565   */
1566   cnt = 0;
1567   /* coupling with process to the left */
1568   for (i=0; i<s; i++) {
1569     for (j=0; j<nc; j++) {
1570       ocols[cnt] = ((!rank) ? 0 : (s - i)*(ofill[j+1] - ofill[j]));
1571       cols[cnt]  = dfill[j+1] - dfill[j] + (s + i)*(ofill[j+1] - ofill[j]);
1572       if (!rank && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1573         if (size > 1) ocols[cnt] += (s - i)*(ofill[j+1] - ofill[j]);
1574         else cols[cnt] += (s - i)*(ofill[j+1] - ofill[j]);
1575       }
1576       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1577       cnt++;
1578     }
1579   }
1580   for (i=s; i<nx-s; i++) {
1581     for (j=0; j<nc; j++) {
1582       cols[cnt] = dfill[j+1] - dfill[j] + 2*s*(ofill[j+1] - ofill[j]);
1583       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1584       cnt++;
1585     }
1586   }
1587   /* coupling with process to the right */
1588   for (i=nx-s; i<nx; i++) {
1589     for (j=0; j<nc; j++) {
1590       ocols[cnt] = ((rank == (size-1)) ? 0 : (i - nx + s + 1)*(ofill[j+1] - ofill[j]));
1591       cols[cnt]  = dfill[j+1] - dfill[j] + (s + nx - i - 1)*(ofill[j+1] - ofill[j]);
1592       if ((rank == size-1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1593         if (size > 1) ocols[cnt] += (i - nx + s + 1)*(ofill[j+1] - ofill[j]);
1594         else cols[cnt] += (i - nx + s + 1)*(ofill[j+1] - ofill[j]);
1595       }
1596       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1597       cnt++;
1598     }
1599   }
1600 
1601   ierr = MatSeqAIJSetPreallocation(J,0,cols);CHKERRQ(ierr);
1602   ierr = MatMPIAIJSetPreallocation(J,0,cols,0,ocols);CHKERRQ(ierr);
1603   ierr = PetscFree2(cols,ocols);CHKERRQ(ierr);
1604 
1605   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1606   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1607 
1608   /*
1609     For each node in the grid: we get the neighbors in the local (on processor ordering
1610     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1611     PETSc ordering.
1612   */
1613   if (!da->prealloc_only) {
1614     ierr = PetscMalloc1(maxcnt,&cols);CHKERRQ(ierr);
1615     row = xs*nc;
1616     /* coupling with process to the left */
1617     for (i=xs; i<xs+s; i++) {
1618       for (j=0; j<nc; j++) {
1619         cnt = 0;
1620         if (rank) {
1621           for (l=0; l<s; l++) {
1622             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1623           }
1624         }
1625         if (!rank && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1626           for (l=0; l<s; l++) {
1627             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (m + i - s - l)*nc + ofill[k];
1628           }
1629         }
1630         if (dfill) {
1631           for (k=dfill[j]; k<dfill[j+1]; k++) {
1632             cols[cnt++] = i*nc + dfill[k];
1633           }
1634         } else {
1635           for (k=0; k<nc; k++) {
1636             cols[cnt++] = i*nc + k;
1637           }
1638         }
1639         for (l=0; l<s; l++) {
1640           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1641         }
1642         ierr = MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr);
1643         row++;
1644       }
1645     }
1646     for (i=xs+s; i<xs+nx-s; i++) {
1647       for (j=0; j<nc; j++) {
1648         cnt = 0;
1649         for (l=0; l<s; l++) {
1650           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1651         }
1652         if (dfill) {
1653           for (k=dfill[j]; k<dfill[j+1]; k++) {
1654             cols[cnt++] = i*nc + dfill[k];
1655           }
1656         } else {
1657           for (k=0; k<nc; k++) {
1658             cols[cnt++] = i*nc + k;
1659           }
1660         }
1661         for (l=0; l<s; l++) {
1662           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1663         }
1664         ierr = MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr);
1665         row++;
1666       }
1667     }
1668     /* coupling with process to the right */
1669     for (i=xs+nx-s; i<xs+nx; i++) {
1670       for (j=0; j<nc; j++) {
1671         cnt = 0;
1672         for (l=0; l<s; l++) {
1673           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1674         }
1675         if (dfill) {
1676           for (k=dfill[j]; k<dfill[j+1]; k++) {
1677             cols[cnt++] = i*nc + dfill[k];
1678           }
1679         } else {
1680           for (k=0; k<nc; k++) {
1681             cols[cnt++] = i*nc + k;
1682           }
1683         }
1684         if (rank < size-1) {
1685           for (l=0; l<s; l++) {
1686             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1687           }
1688         }
1689         if ((rank == size-1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1690           for (l=0; l<s; l++) {
1691             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s - l - m + 2)*nc + ofill[k];
1692           }
1693         }
1694         ierr = MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr);
1695         row++;
1696       }
1697     }
1698     ierr = PetscFree(cols);CHKERRQ(ierr);
1699     /* do not copy values to GPU since they are all zero and not yet needed there */
1700     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1701     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1702     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1703     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1704     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1705   }
1706   PetscFunctionReturn(0);
1707 }
1708 
1709 /* ---------------------------------------------------------------------------------*/
1710 
1711 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da,Mat J,PetscBool isIS)
1712 {
1713   PetscErrorCode         ierr;
1714   PetscInt               xs,nx,i,i1,slot,gxs,gnx;
1715   PetscInt               m,dim,s,*cols = NULL,nc,*rows = NULL,col,cnt,l;
1716   PetscInt               istart,iend;
1717   DMBoundaryType         bx;
1718   ISLocalToGlobalMapping ltog,mltog;
1719 
1720   PetscFunctionBegin;
1721   /*
1722          nc - number of components per grid point
1723          col - number of colors needed in one direction for single component problem
1724 
1725   */
1726   ierr = DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);CHKERRQ(ierr);
1727   if (!isIS && bx == DM_BOUNDARY_NONE) {
1728     ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE);CHKERRQ(ierr);
1729   }
1730   col  = 2*s + 1;
1731 
1732   ierr = DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);CHKERRQ(ierr);
1733   ierr = DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);CHKERRQ(ierr);
1734 
1735   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1736   ierr = MatSeqAIJSetPreallocation(J,col*nc,NULL);CHKERRQ(ierr);
1737   ierr = MatMPIAIJSetPreallocation(J,col*nc,NULL,col*nc,NULL);CHKERRQ(ierr);
1738 
1739   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1740   ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr);
1741   if (!mltog) {
1742     ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1743   }
1744 
1745   /*
1746     For each node in the grid: we get the neighbors in the local (on processor ordering
1747     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1748     PETSc ordering.
1749   */
1750   if (!da->prealloc_only) {
1751     ierr = PetscMalloc2(nc,&rows,col*nc*nc,&cols);CHKERRQ(ierr);
1752     for (i=xs; i<xs+nx; i++) {
1753       istart = PetscMax(-s,gxs - i);
1754       iend   = PetscMin(s,gxs + gnx - i - 1);
1755       slot   = i - gxs;
1756 
1757       cnt = 0;
1758       for (i1=istart; i1<iend+1; i1++) {
1759         cols[cnt++] = nc*(slot + i1);
1760         for (l=1; l<nc; l++) {
1761           cols[cnt] = 1 + cols[cnt-1];cnt++;
1762         }
1763       }
1764       rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1];
1765       ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr);
1766     }
1767     /* do not copy values to GPU since they are all zero and not yet needed there */
1768     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1769     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1770     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1771     if (!isIS && bx == DM_BOUNDARY_NONE) {
1772       ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr);
1773     }
1774     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1775     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1776     ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1777   }
1778   PetscFunctionReturn(0);
1779 }
1780 
1781 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da,Mat J)
1782 {
1783   PetscErrorCode         ierr;
1784   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1785   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
1786   PetscInt               istart,iend,jstart,jend,ii,jj;
1787   MPI_Comm               comm;
1788   PetscScalar            *values;
1789   DMBoundaryType         bx,by;
1790   DMDAStencilType        st;
1791   ISLocalToGlobalMapping ltog;
1792 
1793   PetscFunctionBegin;
1794   /*
1795      nc - number of components per grid point
1796      col - number of colors needed in one direction for single component problem
1797   */
1798   ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr);
1799   col  = 2*s + 1;
1800 
1801   ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr);
1802   ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr);
1803   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1804 
1805   ierr = PetscMalloc1(col*col*nc*nc,&cols);CHKERRQ(ierr);
1806 
1807   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1808 
1809   /* determine the matrix preallocation information */
1810   ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
1811   for (i=xs; i<xs+nx; i++) {
1812     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1813     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1814     for (j=ys; j<ys+ny; j++) {
1815       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1816       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1817       slot   = i - gxs + gnx*(j - gys);
1818 
1819       /* Find block columns in block row */
1820       cnt = 0;
1821       for (ii=istart; ii<iend+1; ii++) {
1822         for (jj=jstart; jj<jend+1; jj++) {
1823           if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1824             cols[cnt++] = slot + ii + gnx*jj;
1825           }
1826         }
1827       }
1828       ierr = MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1829     }
1830   }
1831   ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1832   ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1833   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1834 
1835   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1836 
1837   /*
1838     For each node in the grid: we get the neighbors in the local (on processor ordering
1839     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1840     PETSc ordering.
1841   */
1842   if (!da->prealloc_only) {
1843     ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr);
1844     for (i=xs; i<xs+nx; i++) {
1845       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1846       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1847       for (j=ys; j<ys+ny; j++) {
1848         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1849         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1850         slot = i - gxs + gnx*(j - gys);
1851         cnt  = 0;
1852         for (ii=istart; ii<iend+1; ii++) {
1853           for (jj=jstart; jj<jend+1; jj++) {
1854             if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1855               cols[cnt++] = slot + ii + gnx*jj;
1856             }
1857           }
1858         }
1859         ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1860       }
1861     }
1862     ierr = PetscFree(values);CHKERRQ(ierr);
1863     /* do not copy values to GPU since they are all zero and not yet needed there */
1864     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1865     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1866     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1867     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1868     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1869   }
1870   ierr = PetscFree(cols);CHKERRQ(ierr);
1871   PetscFunctionReturn(0);
1872 }
1873 
1874 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da,Mat J)
1875 {
1876   PetscErrorCode         ierr;
1877   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1878   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
1879   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1880   MPI_Comm               comm;
1881   PetscScalar            *values;
1882   DMBoundaryType         bx,by,bz;
1883   DMDAStencilType        st;
1884   ISLocalToGlobalMapping ltog;
1885 
1886   PetscFunctionBegin;
1887   /*
1888          nc - number of components per grid point
1889          col - number of colors needed in one direction for single component problem
1890 
1891   */
1892   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,NULL,NULL,NULL,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1893   col  = 2*s + 1;
1894 
1895   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1896   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1897   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1898 
1899   ierr = PetscMalloc1(col*col*col,&cols);CHKERRQ(ierr);
1900 
1901   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1902 
1903   /* determine the matrix preallocation information */
1904   ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1905   for (i=xs; i<xs+nx; i++) {
1906     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1907     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1908     for (j=ys; j<ys+ny; j++) {
1909       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1910       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1911       for (k=zs; k<zs+nz; k++) {
1912         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1913         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1914 
1915         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1916 
1917         /* Find block columns in block row */
1918         cnt = 0;
1919         for (ii=istart; ii<iend+1; ii++) {
1920           for (jj=jstart; jj<jend+1; jj++) {
1921             for (kk=kstart; kk<kend+1; kk++) {
1922               if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1923                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1924               }
1925             }
1926           }
1927         }
1928         ierr = MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1929       }
1930     }
1931   }
1932   ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1933   ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1934   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1935 
1936   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1937 
1938   /*
1939     For each node in the grid: we get the neighbors in the local (on processor ordering
1940     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1941     PETSc ordering.
1942   */
1943   if (!da->prealloc_only) {
1944     ierr = PetscCalloc1(col*col*col*nc*nc,&values);CHKERRQ(ierr);
1945     for (i=xs; i<xs+nx; i++) {
1946       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1947       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1948       for (j=ys; j<ys+ny; j++) {
1949         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1950         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1951         for (k=zs; k<zs+nz; k++) {
1952           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1953           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1954 
1955           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1956 
1957           cnt = 0;
1958           for (ii=istart; ii<iend+1; ii++) {
1959             for (jj=jstart; jj<jend+1; jj++) {
1960               for (kk=kstart; kk<kend+1; kk++) {
1961                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1962                   cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1963                 }
1964               }
1965             }
1966           }
1967           ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1968         }
1969       }
1970     }
1971     ierr = PetscFree(values);CHKERRQ(ierr);
1972     /* do not copy values to GPU since they are all zero and not yet needed there */
1973     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1974     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1975     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1976     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1977     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1978   }
1979   ierr = PetscFree(cols);CHKERRQ(ierr);
1980   PetscFunctionReturn(0);
1981 }
1982 
1983 /*
1984   This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to
1985   identify in the local ordering with periodic domain.
1986 */
1987 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog,PetscInt *row,PetscInt *cnt,PetscInt col[])
1988 {
1989   PetscErrorCode ierr;
1990   PetscInt       i,n;
1991 
1992   PetscFunctionBegin;
1993   ierr = ISLocalToGlobalMappingApplyBlock(ltog,1,row,row);CHKERRQ(ierr);
1994   ierr = ISLocalToGlobalMappingApplyBlock(ltog,*cnt,col,col);CHKERRQ(ierr);
1995   for (i=0,n=0; i<*cnt; i++) {
1996     if (col[i] >= *row) col[n++] = col[i];
1997   }
1998   *cnt = n;
1999   PetscFunctionReturn(0);
2000 }
2001 
2002 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da,Mat J)
2003 {
2004   PetscErrorCode         ierr;
2005   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
2006   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
2007   PetscInt               istart,iend,jstart,jend,ii,jj;
2008   MPI_Comm               comm;
2009   PetscScalar            *values;
2010   DMBoundaryType         bx,by;
2011   DMDAStencilType        st;
2012   ISLocalToGlobalMapping ltog;
2013 
2014   PetscFunctionBegin;
2015   /*
2016      nc - number of components per grid point
2017      col - number of colors needed in one direction for single component problem
2018   */
2019   ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr);
2020   col  = 2*s + 1;
2021 
2022   ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr);
2023   ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr);
2024   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
2025 
2026   ierr = PetscMalloc1(col*col*nc*nc,&cols);CHKERRQ(ierr);
2027 
2028   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
2029 
2030   /* determine the matrix preallocation information */
2031   ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
2032   for (i=xs; i<xs+nx; i++) {
2033     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2034     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2035     for (j=ys; j<ys+ny; j++) {
2036       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2037       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2038       slot   = i - gxs + gnx*(j - gys);
2039 
2040       /* Find block columns in block row */
2041       cnt = 0;
2042       for (ii=istart; ii<iend+1; ii++) {
2043         for (jj=jstart; jj<jend+1; jj++) {
2044           if (st == DMDA_STENCIL_BOX || !ii || !jj) {
2045             cols[cnt++] = slot + ii + gnx*jj;
2046           }
2047         }
2048       }
2049       ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
2050       ierr = MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
2051     }
2052   }
2053   ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
2054   ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
2055   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2056 
2057   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
2058 
2059   /*
2060     For each node in the grid: we get the neighbors in the local (on processor ordering
2061     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2062     PETSc ordering.
2063   */
2064   if (!da->prealloc_only) {
2065     ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr);
2066     for (i=xs; i<xs+nx; i++) {
2067       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2068       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2069       for (j=ys; j<ys+ny; j++) {
2070         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2071         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2072         slot   = i - gxs + gnx*(j - gys);
2073 
2074         /* Find block columns in block row */
2075         cnt = 0;
2076         for (ii=istart; ii<iend+1; ii++) {
2077           for (jj=jstart; jj<jend+1; jj++) {
2078             if (st == DMDA_STENCIL_BOX || !ii || !jj) {
2079               cols[cnt++] = slot + ii + gnx*jj;
2080             }
2081           }
2082         }
2083         ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
2084         ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
2085       }
2086     }
2087     ierr = PetscFree(values);CHKERRQ(ierr);
2088     /* do not copy values to GPU since they are all zero and not yet needed there */
2089     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
2090     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2091     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2092     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
2093     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2094   }
2095   ierr = PetscFree(cols);CHKERRQ(ierr);
2096   PetscFunctionReturn(0);
2097 }
2098 
2099 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da,Mat J)
2100 {
2101   PetscErrorCode         ierr;
2102   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
2103   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
2104   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
2105   MPI_Comm               comm;
2106   PetscScalar            *values;
2107   DMBoundaryType         bx,by,bz;
2108   DMDAStencilType        st;
2109   ISLocalToGlobalMapping ltog;
2110 
2111   PetscFunctionBegin;
2112   /*
2113      nc - number of components per grid point
2114      col - number of colors needed in one direction for single component problem
2115   */
2116   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,NULL,NULL,NULL,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
2117   col  = 2*s + 1;
2118 
2119   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
2120   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
2121   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
2122 
2123   /* create the matrix */
2124   ierr = PetscMalloc1(col*col*col,&cols);CHKERRQ(ierr);
2125 
2126   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
2127 
2128   /* determine the matrix preallocation information */
2129   ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
2130   for (i=xs; i<xs+nx; i++) {
2131     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2132     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2133     for (j=ys; j<ys+ny; j++) {
2134       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2135       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2136       for (k=zs; k<zs+nz; k++) {
2137         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2138         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2139 
2140         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2141 
2142         /* Find block columns in block row */
2143         cnt = 0;
2144         for (ii=istart; ii<iend+1; ii++) {
2145           for (jj=jstart; jj<jend+1; jj++) {
2146             for (kk=kstart; kk<kend+1; kk++) {
2147               if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
2148                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
2149               }
2150             }
2151           }
2152         }
2153         ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
2154         ierr = MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
2155       }
2156     }
2157   }
2158   ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
2159   ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
2160   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2161 
2162   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
2163 
2164   /*
2165     For each node in the grid: we get the neighbors in the local (on processor ordering
2166     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2167     PETSc ordering.
2168   */
2169   if (!da->prealloc_only) {
2170     ierr = PetscCalloc1(col*col*col*nc*nc,&values);CHKERRQ(ierr);
2171     for (i=xs; i<xs+nx; i++) {
2172       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2173       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2174       for (j=ys; j<ys+ny; j++) {
2175         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2176         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2177         for (k=zs; k<zs+nz; k++) {
2178           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2179           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2180 
2181           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2182 
2183           cnt = 0;
2184           for (ii=istart; ii<iend+1; ii++) {
2185             for (jj=jstart; jj<jend+1; jj++) {
2186               for (kk=kstart; kk<kend+1; kk++) {
2187                 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
2188                   cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
2189                 }
2190               }
2191             }
2192           }
2193           ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
2194           ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
2195         }
2196       }
2197     }
2198     ierr = PetscFree(values);CHKERRQ(ierr);
2199     /* do not copy values to GPU since they are all zero and not yet needed there */
2200     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
2201     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2202     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2203     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
2204     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2205   }
2206   ierr = PetscFree(cols);CHKERRQ(ierr);
2207   PetscFunctionReturn(0);
2208 }
2209 
2210 /* ---------------------------------------------------------------------------------*/
2211 
2212 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da,Mat J)
2213 {
2214   PetscErrorCode         ierr;
2215   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
2216   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt, maxcnt = 0,l,p,*dnz,*onz;
2217   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
2218   DM_DA                  *dd = (DM_DA*)da->data;
2219   PetscInt               ifill_col,*dfill = dd->dfill,*ofill = dd->ofill;
2220   MPI_Comm               comm;
2221   PetscScalar            *values;
2222   DMBoundaryType         bx,by,bz;
2223   ISLocalToGlobalMapping ltog;
2224   DMDAStencilType        st;
2225   PetscBool              removedups = PETSC_FALSE;
2226 
2227   PetscFunctionBegin;
2228   /*
2229          nc - number of components per grid point
2230          col - number of colors needed in one direction for single component problem
2231 
2232   */
2233   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
2234   col  = 2*s + 1;
2235   if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
2236                  by 2*stencil_width + 1\n");
2237   if (by == DM_BOUNDARY_PERIODIC && (n % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
2238                  by 2*stencil_width + 1\n");
2239   if (bz == DM_BOUNDARY_PERIODIC && (p % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
2240                  by 2*stencil_width + 1\n");
2241 
2242   /*
2243        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
2244        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
2245   */
2246   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
2247   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
2248   if (P == 1 && 2*s >= p) removedups = PETSC_TRUE;
2249 
2250   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
2251   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
2252   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
2253 
2254   ierr = PetscMalloc1(col*col*col*nc,&cols);CHKERRQ(ierr);
2255   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
2256 
2257   /* determine the matrix preallocation information */
2258   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
2259 
2260   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
2261   for (i=xs; i<xs+nx; i++) {
2262     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2263     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2264     for (j=ys; j<ys+ny; j++) {
2265       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2266       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2267       for (k=zs; k<zs+nz; k++) {
2268         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2269         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2270 
2271         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2272 
2273         for (l=0; l<nc; l++) {
2274           cnt = 0;
2275           for (ii=istart; ii<iend+1; ii++) {
2276             for (jj=jstart; jj<jend+1; jj++) {
2277               for (kk=kstart; kk<kend+1; kk++) {
2278                 if (ii || jj || kk) {
2279                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
2280                     for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2281                   }
2282                 } else {
2283                   if (dfill) {
2284                     for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2285                   } else {
2286                     for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2287                   }
2288                 }
2289               }
2290             }
2291           }
2292           row  = l + nc*(slot);
2293           maxcnt = PetscMax(maxcnt,cnt);
2294           if (removedups) {
2295             ierr = MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
2296           } else {
2297             ierr = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
2298           }
2299         }
2300       }
2301     }
2302   }
2303   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
2304   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
2305   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2306   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
2307 
2308   /*
2309     For each node in the grid: we get the neighbors in the local (on processor ordering
2310     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2311     PETSc ordering.
2312   */
2313   if (!da->prealloc_only) {
2314     ierr = PetscCalloc1(maxcnt,&values);CHKERRQ(ierr);
2315     for (i=xs; i<xs+nx; i++) {
2316       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2317       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2318       for (j=ys; j<ys+ny; j++) {
2319         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2320         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2321         for (k=zs; k<zs+nz; k++) {
2322           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2323           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2324 
2325           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2326 
2327           for (l=0; l<nc; l++) {
2328             cnt = 0;
2329             for (ii=istart; ii<iend+1; ii++) {
2330               for (jj=jstart; jj<jend+1; jj++) {
2331                 for (kk=kstart; kk<kend+1; kk++) {
2332                   if (ii || jj || kk) {
2333                     if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
2334                       for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2335                     }
2336                   } else {
2337                     if (dfill) {
2338                       for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2339                     } else {
2340                       for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2341                     }
2342                   }
2343                 }
2344               }
2345             }
2346             row  = l + nc*(slot);
2347             ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
2348           }
2349         }
2350       }
2351     }
2352     ierr = PetscFree(values);CHKERRQ(ierr);
2353     /* do not copy values to GPU since they are all zero and not yet needed there */
2354     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
2355     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2356     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2357     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
2358     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2359   }
2360   ierr = PetscFree(cols);CHKERRQ(ierr);
2361   PetscFunctionReturn(0);
2362 }
2363