xref: /petsc/src/dm/impls/da/fdda.c (revision b41ce5d507ea9a58bfa83cf403107a702e77a67d)
1 
2 #include <petsc/private/dmdaimpl.h> /*I      "petscdmda.h"     I*/
3 #include <petscmat.h>
4 
5 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM,ISColoringType,ISColoring*);
6 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM,ISColoringType,ISColoring*);
7 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM,ISColoringType,ISColoring*);
8 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM,ISColoringType,ISColoring*);
9 
10 /*
11    For ghost i that may be negative or greater than the upper bound this
12   maps it into the 0:m-1 range using periodicity
13 */
14 #define SetInRange(i,m) ((i < 0) ? m+i : ((i >= m) ? i-m : i))
15 
16 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill,PetscInt w,PetscInt **rfill)
17 {
18   PetscErrorCode ierr;
19   PetscInt       i,j,nz,*fill;
20 
21   PetscFunctionBegin;
22   if (!dfill) PetscFunctionReturn(0);
23 
24   /* count number nonzeros */
25   nz = 0;
26   for (i=0; i<w; i++) {
27     for (j=0; j<w; j++) {
28       if (dfill[w*i+j]) nz++;
29     }
30   }
31   ierr = PetscMalloc1(nz + w + 1,&fill);CHKERRQ(ierr);
32   /* construct modified CSR storage of nonzero structure */
33   /*  fill[0 -- w] marks starts of each row of column indices (and end of last row)
34    so fill[1] - fill[0] gives number of nonzeros in first row etc */
35   nz = w + 1;
36   for (i=0; i<w; i++) {
37     fill[i] = nz;
38     for (j=0; j<w; j++) {
39       if (dfill[w*i+j]) {
40         fill[nz] = j;
41         nz++;
42       }
43     }
44   }
45   fill[w] = nz;
46 
47   *rfill = fill;
48   PetscFunctionReturn(0);
49 }
50 
51 /*@
52     DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem
53     of the matrix returned by DMCreateMatrix().
54 
55     Logically Collective on DMDA
56 
57     Input Parameter:
58 +   da - the distributed array
59 .   dfill - the fill pattern in the diagonal block (may be NULL, means use dense block)
60 -   ofill - the fill pattern in the off-diagonal blocks
61 
62 
63     Level: developer
64 
65     Notes: This only makes sense when you are doing multicomponent problems but using the
66        MPIAIJ matrix format
67 
68            The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries
69        representing coupling and 0 entries for missing coupling. For example
70 $             dfill[9] = {1, 0, 0,
71 $                         1, 1, 0,
72 $                         0, 1, 1}
73        means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with
74        itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the
75        diagonal block).
76 
77      DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
78      can be represented in the dfill, ofill format
79 
80    Contributed by Glenn Hammond
81 
82 .seealso DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly()
83 
84 @*/
85 PetscErrorCode  DMDASetBlockFills(DM da,const PetscInt *dfill,const PetscInt *ofill)
86 {
87   DM_DA          *dd = (DM_DA*)da->data;
88   PetscErrorCode ierr;
89   PetscInt       i,k,cnt = 1;
90 
91   PetscFunctionBegin;
92   ierr = DMDASetBlockFills_Private(dfill,dd->w,&dd->dfill);CHKERRQ(ierr);
93   ierr = DMDASetBlockFills_Private(ofill,dd->w,&dd->ofill);CHKERRQ(ierr);
94 
95   /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of
96    columns to the left with any nonzeros in them plus 1 */
97   ierr = PetscCalloc1(dd->w,&dd->ofillcols);CHKERRQ(ierr);
98   for (i=0; i<dd->w; i++) {
99     for (k=dd->ofill[i]; k<dd->ofill[i+1]; k++) dd->ofillcols[dd->ofill[k]] = 1;
100   }
101   for (i=0; i<dd->w; i++) {
102     if (dd->ofillcols[i]) {
103       dd->ofillcols[i] = cnt++;
104     }
105   }
106   PetscFunctionReturn(0);
107 }
108 
109 
110 PetscErrorCode  DMCreateColoring_DA(DM da,ISColoringType ctype,ISColoring *coloring)
111 {
112   PetscErrorCode   ierr;
113   PetscInt         dim,m,n,p,nc;
114   DMBoundaryType bx,by,bz;
115   MPI_Comm         comm;
116   PetscMPIInt      size;
117   PetscBool        isBAIJ;
118   DM_DA            *dd = (DM_DA*)da->data;
119 
120   PetscFunctionBegin;
121   /*
122                                   m
123           ------------------------------------------------------
124          |                                                     |
125          |                                                     |
126          |               ----------------------                |
127          |               |                    |                |
128       n  |           yn  |                    |                |
129          |               |                    |                |
130          |               .---------------------                |
131          |             (xs,ys)     xn                          |
132          |            .                                        |
133          |         (gxs,gys)                                   |
134          |                                                     |
135           -----------------------------------------------------
136   */
137 
138   /*
139          nc - number of components per grid point
140          col - number of colors needed in one direction for single component problem
141 
142   */
143   ierr = DMDAGetInfo(da,&dim,0,0,0,&m,&n,&p,&nc,0,&bx,&by,&bz,0);CHKERRQ(ierr);
144 
145   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
146   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
147   if (ctype == IS_COLORING_LOCAL) {
148     if (size == 1) {
149       ctype = IS_COLORING_GLOBAL;
150     } else if (dim > 1) {
151       if ((m==1 && bx == DM_BOUNDARY_PERIODIC) || (n==1 && by == DM_BOUNDARY_PERIODIC) || (p==1 && bz == DM_BOUNDARY_PERIODIC)) {
152         SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain  on the same process");
153       }
154     }
155   }
156 
157   /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ
158      matrices is for the blocks, not the individual matrix elements  */
159   ierr = PetscStrcmp(da->mattype,MATBAIJ,&isBAIJ);CHKERRQ(ierr);
160   if (!isBAIJ) {ierr = PetscStrcmp(da->mattype,MATMPIBAIJ,&isBAIJ);CHKERRQ(ierr);}
161   if (!isBAIJ) {ierr = PetscStrcmp(da->mattype,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);}
162   if (isBAIJ) {
163     dd->w  = 1;
164     dd->xs = dd->xs/nc;
165     dd->xe = dd->xe/nc;
166     dd->Xs = dd->Xs/nc;
167     dd->Xe = dd->Xe/nc;
168   }
169 
170   /*
171      We do not provide a getcoloring function in the DMDA operations because
172    the basic DMDA does not know about matrices. We think of DMDA as being more
173    more low-level then matrices.
174   */
175   if (dim == 1) {
176     ierr = DMCreateColoring_DA_1d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
177   } else if (dim == 2) {
178     ierr =  DMCreateColoring_DA_2d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
179   } else if (dim == 3) {
180     ierr =  DMCreateColoring_DA_3d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
181   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not done for %D dimension, send us mail petsc-maint@mcs.anl.gov for code",dim);
182   if (isBAIJ) {
183     dd->w  = nc;
184     dd->xs = dd->xs*nc;
185     dd->xe = dd->xe*nc;
186     dd->Xs = dd->Xs*nc;
187     dd->Xe = dd->Xe*nc;
188   }
189   PetscFunctionReturn(0);
190 }
191 
192 /* ---------------------------------------------------------------------------------*/
193 
194 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
195 {
196   PetscErrorCode   ierr;
197   PetscInt         xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,M,N,dim,s,k,nc,col;
198   PetscInt         ncolors;
199   MPI_Comm         comm;
200   DMBoundaryType bx,by;
201   DMDAStencilType  st;
202   ISColoringValue  *colors;
203   DM_DA            *dd = (DM_DA*)da->data;
204 
205   PetscFunctionBegin;
206   /*
207          nc - number of components per grid point
208          col - number of colors needed in one direction for single component problem
209 
210   */
211   ierr = DMDAGetInfo(da,&dim,&m,&n,0,&M,&N,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
212   col  = 2*s + 1;
213   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
214   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
215   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
216 
217   /* special case as taught to us by Paul Hovland */
218   if (st == DMDA_STENCIL_STAR && s == 1) {
219     ierr = DMCreateColoring_DA_2d_5pt_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
220   } else {
221 
222     if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ2(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X (%d) is divisible\n\
223                                                             by 2*stencil_width + 1 (%d)\n", m, col);
224     if (by == DM_BOUNDARY_PERIODIC && (n % col)) SETERRQ2(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y (%d) is divisible\n\
225                                                             by 2*stencil_width + 1 (%d)\n", n, col);
226     if (ctype == IS_COLORING_GLOBAL) {
227       if (!dd->localcoloring) {
228         ierr = PetscMalloc1(nc*nx*ny,&colors);CHKERRQ(ierr);
229         ii   = 0;
230         for (j=ys; j<ys+ny; j++) {
231           for (i=xs; i<xs+nx; i++) {
232             for (k=0; k<nc; k++) {
233               colors[ii++] = k + nc*((i % col) + col*(j % col));
234             }
235           }
236         }
237         ncolors = nc + nc*(col-1 + col*(col-1));
238         ierr    = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
239       }
240       *coloring = dd->localcoloring;
241     } else if (ctype == IS_COLORING_LOCAL) {
242       if (!dd->ghostedcoloring) {
243         ierr = PetscMalloc1(nc*gnx*gny,&colors);CHKERRQ(ierr);
244         ii   = 0;
245         for (j=gys; j<gys+gny; j++) {
246           for (i=gxs; i<gxs+gnx; i++) {
247             for (k=0; k<nc; k++) {
248               /* the complicated stuff is to handle periodic boundaries */
249               colors[ii++] = k + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col));
250             }
251           }
252         }
253         ncolors = nc + nc*(col - 1 + col*(col-1));
254         ierr    = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
255         /* PetscIntView(ncolors,(PetscInt*)colors,0); */
256 
257         ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
258       }
259       *coloring = dd->ghostedcoloring;
260     } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
261   }
262   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
263   PetscFunctionReturn(0);
264 }
265 
266 /* ---------------------------------------------------------------------------------*/
267 
268 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
269 {
270   PetscErrorCode   ierr;
271   PetscInt         xs,ys,nx,ny,i,j,gxs,gys,gnx,gny,m,n,p,dim,s,k,nc,col,zs,gzs,ii,l,nz,gnz,M,N,P;
272   PetscInt         ncolors;
273   MPI_Comm         comm;
274   DMBoundaryType bx,by,bz;
275   DMDAStencilType  st;
276   ISColoringValue  *colors;
277   DM_DA            *dd = (DM_DA*)da->data;
278 
279   PetscFunctionBegin;
280   /*
281          nc - number of components per grid point
282          col - number of colors needed in one direction for single component problem
283 
284   */
285   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
286   col  = 2*s + 1;
287   if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
288                                                          by 2*stencil_width + 1\n");
289   if (by == DM_BOUNDARY_PERIODIC && (n % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
290                                                          by 2*stencil_width + 1\n");
291   if (bz == DM_BOUNDARY_PERIODIC && (p % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
292                                                          by 2*stencil_width + 1\n");
293 
294   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
295   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
296   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
297 
298   /* create the coloring */
299   if (ctype == IS_COLORING_GLOBAL) {
300     if (!dd->localcoloring) {
301       ierr = PetscMalloc1(nc*nx*ny*nz,&colors);CHKERRQ(ierr);
302       ii   = 0;
303       for (k=zs; k<zs+nz; k++) {
304         for (j=ys; j<ys+ny; j++) {
305           for (i=xs; i<xs+nx; i++) {
306             for (l=0; l<nc; l++) {
307               colors[ii++] = l + nc*((i % col) + col*(j % col) + col*col*(k % col));
308             }
309           }
310         }
311       }
312       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
313       ierr    = ISColoringCreate(comm,ncolors,nc*nx*ny*nz,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
314     }
315     *coloring = dd->localcoloring;
316   } else if (ctype == IS_COLORING_LOCAL) {
317     if (!dd->ghostedcoloring) {
318       ierr = PetscMalloc1(nc*gnx*gny*gnz,&colors);CHKERRQ(ierr);
319       ii   = 0;
320       for (k=gzs; k<gzs+gnz; k++) {
321         for (j=gys; j<gys+gny; j++) {
322           for (i=gxs; i<gxs+gnx; i++) {
323             for (l=0; l<nc; l++) {
324               /* the complicated stuff is to handle periodic boundaries */
325               colors[ii++] = l + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col) + col*col*(SetInRange(k,p) % col));
326             }
327           }
328         }
329       }
330       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
331       ierr    = ISColoringCreate(comm,ncolors,nc*gnx*gny*gnz,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
332       ierr    = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
333     }
334     *coloring = dd->ghostedcoloring;
335   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
336   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
337   PetscFunctionReturn(0);
338 }
339 
340 /* ---------------------------------------------------------------------------------*/
341 
342 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
343 {
344   PetscErrorCode   ierr;
345   PetscInt         xs,nx,i,i1,gxs,gnx,l,m,M,dim,s,nc,col;
346   PetscInt         ncolors;
347   MPI_Comm         comm;
348   DMBoundaryType bx;
349   ISColoringValue  *colors;
350   DM_DA            *dd = (DM_DA*)da->data;
351 
352   PetscFunctionBegin;
353   /*
354          nc - number of components per grid point
355          col - number of colors needed in one direction for single component problem
356 
357   */
358   ierr = DMDAGetInfo(da,&dim,&m,0,0,&M,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr);
359   col  = 2*s + 1;
360 
361   if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ2(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points %d is divisible\n\
362                                                           by 2*stencil_width + 1 %d\n",(int)m,(int)col);
363 
364   ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
365   ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
366   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
367 
368   /* create the coloring */
369   if (ctype == IS_COLORING_GLOBAL) {
370     if (!dd->localcoloring) {
371       ierr = PetscMalloc1(nc*nx,&colors);CHKERRQ(ierr);
372       if (dd->ofillcols) {
373         PetscInt tc = 0;
374         for (i=0; i<nc; i++) tc += (PetscInt) (dd->ofillcols[i] > 0);
375         i1 = 0;
376         for (i=xs; i<xs+nx; i++) {
377           for (l=0; l<nc; l++) {
378             if (dd->ofillcols[l] && (i % col)) {
379               colors[i1++] =  nc - 1 + tc*((i % col) - 1) + dd->ofillcols[l];
380             } else {
381               colors[i1++] = l;
382             }
383           }
384         }
385         ncolors = nc + 2*s*tc;
386       } else {
387         i1 = 0;
388         for (i=xs; i<xs+nx; i++) {
389           for (l=0; l<nc; l++) {
390             colors[i1++] = l + nc*(i % col);
391           }
392         }
393         ncolors = nc + nc*(col-1);
394       }
395       ierr = ISColoringCreate(comm,ncolors,nc*nx,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
396     }
397     *coloring = dd->localcoloring;
398   } else if (ctype == IS_COLORING_LOCAL) {
399     if (!dd->ghostedcoloring) {
400       ierr = PetscMalloc1(nc*gnx,&colors);CHKERRQ(ierr);
401       i1   = 0;
402       for (i=gxs; i<gxs+gnx; i++) {
403         for (l=0; l<nc; l++) {
404           /* the complicated stuff is to handle periodic boundaries */
405           colors[i1++] = l + nc*(SetInRange(i,m) % col);
406         }
407       }
408       ncolors = nc + nc*(col-1);
409       ierr    = ISColoringCreate(comm,ncolors,nc*gnx,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
410       ierr    = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
411     }
412     *coloring = dd->ghostedcoloring;
413   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
414   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
415   PetscFunctionReturn(0);
416 }
417 
418 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
419 {
420   PetscErrorCode   ierr;
421   PetscInt         xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,dim,s,k,nc;
422   PetscInt         ncolors;
423   MPI_Comm         comm;
424   DMBoundaryType bx,by;
425   ISColoringValue  *colors;
426   DM_DA            *dd = (DM_DA*)da->data;
427 
428   PetscFunctionBegin;
429   /*
430          nc - number of components per grid point
431          col - number of colors needed in one direction for single component problem
432 
433   */
434   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,0);CHKERRQ(ierr);
435   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
436   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
437   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
438 
439   if (bx == DM_BOUNDARY_PERIODIC && (m % 5)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible by 5\n");
440   if (by == DM_BOUNDARY_PERIODIC && (n % 5)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible by 5\n");
441 
442   /* create the coloring */
443   if (ctype == IS_COLORING_GLOBAL) {
444     if (!dd->localcoloring) {
445       ierr = PetscMalloc1(nc*nx*ny,&colors);CHKERRQ(ierr);
446       ii   = 0;
447       for (j=ys; j<ys+ny; j++) {
448         for (i=xs; i<xs+nx; i++) {
449           for (k=0; k<nc; k++) {
450             colors[ii++] = k + nc*((3*j+i) % 5);
451           }
452         }
453       }
454       ncolors = 5*nc;
455       ierr    = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
456     }
457     *coloring = dd->localcoloring;
458   } else if (ctype == IS_COLORING_LOCAL) {
459     if (!dd->ghostedcoloring) {
460       ierr = PetscMalloc1(nc*gnx*gny,&colors);CHKERRQ(ierr);
461       ii = 0;
462       for (j=gys; j<gys+gny; j++) {
463         for (i=gxs; i<gxs+gnx; i++) {
464           for (k=0; k<nc; k++) {
465             colors[ii++] = k + nc*((3*SetInRange(j,n) + SetInRange(i,m)) % 5);
466           }
467         }
468       }
469       ncolors = 5*nc;
470       ierr    = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
471       ierr    = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
472     }
473     *coloring = dd->ghostedcoloring;
474   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
475   PetscFunctionReturn(0);
476 }
477 
478 /* =========================================================================== */
479 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM,Mat);
480 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM,Mat);
481 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM,Mat);
482 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM,Mat);
483 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM,Mat);
484 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM,Mat);
485 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM,Mat);
486 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM,Mat);
487 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM,Mat);
488 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM,Mat);
489 
490 /*@C
491    MatSetupDM - Sets the DMDA that is to be used by the HYPRE_StructMatrix PETSc matrix
492 
493    Logically Collective on Mat
494 
495    Input Parameters:
496 +  mat - the matrix
497 -  da - the da
498 
499    Level: intermediate
500 
501 @*/
502 PetscErrorCode MatSetupDM(Mat mat,DM da)
503 {
504   PetscErrorCode ierr;
505 
506   PetscFunctionBegin;
507   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
508   PetscValidHeaderSpecific(da,DM_CLASSID,1);
509   ierr = PetscTryMethod(mat,"MatSetupDM_C",(Mat,DM),(mat,da));CHKERRQ(ierr);
510   PetscFunctionReturn(0);
511 }
512 
513 PetscErrorCode  MatView_MPI_DA(Mat A,PetscViewer viewer)
514 {
515   DM                da;
516   PetscErrorCode    ierr;
517   const char        *prefix;
518   Mat               Anatural;
519   AO                ao;
520   PetscInt          rstart,rend,*petsc,i;
521   IS                is;
522   MPI_Comm          comm;
523   PetscViewerFormat format;
524 
525   PetscFunctionBegin;
526   /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */
527   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
528   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(0);
529 
530   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
531   ierr = MatGetDM(A, &da);CHKERRQ(ierr);
532   if (!da) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
533 
534   ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);
535   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
536   ierr = PetscMalloc1(rend-rstart,&petsc);CHKERRQ(ierr);
537   for (i=rstart; i<rend; i++) petsc[i-rstart] = i;
538   ierr = AOApplicationToPetsc(ao,rend-rstart,petsc);CHKERRQ(ierr);
539   ierr = ISCreateGeneral(comm,rend-rstart,petsc,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
540 
541   /* call viewer on natural ordering */
542   ierr = MatCreateSubMatrix(A,is,is,MAT_INITIAL_MATRIX,&Anatural);CHKERRQ(ierr);
543   ierr = ISDestroy(&is);CHKERRQ(ierr);
544   ierr = PetscObjectGetOptionsPrefix((PetscObject)A,&prefix);CHKERRQ(ierr);
545   ierr = PetscObjectSetOptionsPrefix((PetscObject)Anatural,prefix);CHKERRQ(ierr);
546   ierr = PetscObjectSetName((PetscObject)Anatural,((PetscObject)A)->name);CHKERRQ(ierr);
547   ierr = MatView(Anatural,viewer);CHKERRQ(ierr);
548   ierr = MatDestroy(&Anatural);CHKERRQ(ierr);
549   PetscFunctionReturn(0);
550 }
551 
552 PetscErrorCode  MatLoad_MPI_DA(Mat A,PetscViewer viewer)
553 {
554   DM             da;
555   PetscErrorCode ierr;
556   Mat            Anatural,Aapp;
557   AO             ao;
558   PetscInt       rstart,rend,*app,i,m,n,M,N;
559   IS             is;
560   MPI_Comm       comm;
561 
562   PetscFunctionBegin;
563   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
564   ierr = MatGetDM(A, &da);CHKERRQ(ierr);
565   if (!da) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
566 
567   /* Load the matrix in natural ordering */
568   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Anatural);CHKERRQ(ierr);
569   ierr = MatSetType(Anatural,((PetscObject)A)->type_name);CHKERRQ(ierr);
570   ierr = MatGetSize(A,&M,&N);CHKERRQ(ierr);
571   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
572   ierr = MatSetSizes(Anatural,m,n,M,N);CHKERRQ(ierr);
573   ierr = MatLoad(Anatural,viewer);CHKERRQ(ierr);
574 
575   /* Map natural ordering to application ordering and create IS */
576   ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);
577   ierr = MatGetOwnershipRange(Anatural,&rstart,&rend);CHKERRQ(ierr);
578   ierr = PetscMalloc1(rend-rstart,&app);CHKERRQ(ierr);
579   for (i=rstart; i<rend; i++) app[i-rstart] = i;
580   ierr = AOPetscToApplication(ao,rend-rstart,app);CHKERRQ(ierr);
581   ierr = ISCreateGeneral(comm,rend-rstart,app,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
582 
583   /* Do permutation and replace header */
584   ierr = MatCreateSubMatrix(Anatural,is,is,MAT_INITIAL_MATRIX,&Aapp);CHKERRQ(ierr);
585   ierr = MatHeaderReplace(A,&Aapp);CHKERRQ(ierr);
586   ierr = ISDestroy(&is);CHKERRQ(ierr);
587   ierr = MatDestroy(&Anatural);CHKERRQ(ierr);
588   PetscFunctionReturn(0);
589 }
590 
591 PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J)
592 {
593   PetscErrorCode ierr;
594   PetscInt       dim,dof,nx,ny,nz,dims[3],starts[3],M,N,P;
595   Mat            A;
596   MPI_Comm       comm;
597   MatType        Atype;
598   PetscSection   section, sectionGlobal;
599   void           (*aij)(void)=NULL,(*baij)(void)=NULL,(*sbaij)(void)=NULL;
600   MatType        mtype;
601   PetscMPIInt    size;
602   DM_DA          *dd = (DM_DA*)da->data;
603 
604   PetscFunctionBegin;
605   ierr = MatInitializePackage();CHKERRQ(ierr);
606   mtype = da->mattype;
607 
608   ierr = DMGetDefaultSection(da, &section);CHKERRQ(ierr);
609   if (section) {
610     PetscInt  bs = -1;
611     PetscInt  localSize;
612     PetscBool isShell, isBlock, isSeqBlock, isMPIBlock, isSymBlock, isSymSeqBlock, isSymMPIBlock, isSymmetric;
613 
614     ierr = DMGetDefaultGlobalSection(da, &sectionGlobal);CHKERRQ(ierr);
615     ierr = PetscSectionGetConstrainedStorageSize(sectionGlobal, &localSize);CHKERRQ(ierr);
616     ierr = MatCreate(PetscObjectComm((PetscObject)da),&A);CHKERRQ(ierr);
617     ierr = MatSetSizes(A,localSize,localSize,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
618     ierr = MatSetType(A,mtype);CHKERRQ(ierr);
619     ierr = PetscStrcmp(mtype,MATSHELL,&isShell);CHKERRQ(ierr);
620     ierr = PetscStrcmp(mtype,MATBAIJ,&isBlock);CHKERRQ(ierr);
621     ierr = PetscStrcmp(mtype,MATSEQBAIJ,&isSeqBlock);CHKERRQ(ierr);
622     ierr = PetscStrcmp(mtype,MATMPIBAIJ,&isMPIBlock);CHKERRQ(ierr);
623     ierr = PetscStrcmp(mtype,MATSBAIJ,&isSymBlock);CHKERRQ(ierr);
624     ierr = PetscStrcmp(mtype,MATSEQSBAIJ,&isSymSeqBlock);CHKERRQ(ierr);
625     ierr = PetscStrcmp(mtype,MATMPISBAIJ,&isSymMPIBlock);CHKERRQ(ierr);
626     /* Check for symmetric storage */
627     isSymmetric = (PetscBool) (isSymBlock || isSymSeqBlock || isSymMPIBlock);
628     if (isSymmetric) {
629       ierr = MatSetOption(*J, MAT_IGNORE_LOWER_TRIANGULAR, PETSC_TRUE);CHKERRQ(ierr);
630     }
631     if (!isShell) {
632       PetscInt *dnz, *onz, *dnzu, *onzu, bsLocal;
633 
634       if (bs < 0) {
635         if (isBlock || isSeqBlock || isMPIBlock || isSymBlock || isSymSeqBlock || isSymMPIBlock) {
636           PetscInt pStart, pEnd, p, dof;
637 
638           ierr = PetscSectionGetChart(sectionGlobal, &pStart, &pEnd);CHKERRQ(ierr);
639           for (p = pStart; p < pEnd; ++p) {
640             ierr = PetscSectionGetDof(sectionGlobal, p, &dof);CHKERRQ(ierr);
641             if (dof) {
642               bs = dof;
643               break;
644             }
645           }
646         } else {
647           bs = 1;
648         }
649         /* Must have same blocksize on all procs (some might have no points) */
650         bsLocal = bs;
651         ierr    = MPIU_Allreduce(&bsLocal, &bs, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)da));CHKERRQ(ierr);
652       }
653       ierr = PetscCalloc4(localSize/bs, &dnz, localSize/bs, &onz, localSize/bs, &dnzu, localSize/bs, &onzu);CHKERRQ(ierr);
654       /* ierr = DMPlexPreallocateOperator(dm, bs, section, sectionGlobal, dnz, onz, dnzu, onzu, *J, fillMatrix);CHKERRQ(ierr); */
655       ierr = PetscFree4(dnz, onz, dnzu, onzu);CHKERRQ(ierr);
656     }
657   }
658   /*
659                                   m
660           ------------------------------------------------------
661          |                                                     |
662          |                                                     |
663          |               ----------------------                |
664          |               |                    |                |
665       n  |           ny  |                    |                |
666          |               |                    |                |
667          |               .---------------------                |
668          |             (xs,ys)     nx                          |
669          |            .                                        |
670          |         (gxs,gys)                                   |
671          |                                                     |
672           -----------------------------------------------------
673   */
674 
675   /*
676          nc - number of components per grid point
677          col - number of colors needed in one direction for single component problem
678 
679   */
680   M   = dd->M;
681   N   = dd->N;
682   P   = dd->P;
683   dim = da->dim;
684   dof = dd->w;
685   /* ierr = DMDAGetInfo(da,&dim,&M,&N,&P,0,0,0,&dof,0,0,0,0,0);CHKERRQ(ierr); */
686   ierr = DMDAGetCorners(da,0,0,0,&nx,&ny,&nz);CHKERRQ(ierr);
687   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
688   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
689   ierr = MatSetSizes(A,dof*nx*ny*nz,dof*nx*ny*nz,dof*M*N*P,dof*M*N*P);CHKERRQ(ierr);
690   ierr = MatSetType(A,mtype);CHKERRQ(ierr);
691   ierr = MatSetDM(A,da);CHKERRQ(ierr);
692   if (da->structure_only) {
693     ierr = MatSetOption(A,MAT_STRUCTURE_ONLY,PETSC_TRUE);CHKERRQ(ierr);
694   }
695   ierr = MatGetType(A,&Atype);CHKERRQ(ierr);
696   /*
697      We do not provide a getmatrix function in the DMDA operations because
698    the basic DMDA does not know about matrices. We think of DMDA as being more
699    more low-level than matrices. This is kind of cheating but, cause sometimes
700    we think of DMDA has higher level than matrices.
701 
702      We could switch based on Atype (or mtype), but we do not since the
703    specialized setting routines depend only the particular preallocation
704    details of the matrix, not the type itself.
705   */
706   ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJSetPreallocation_C",&aij);CHKERRQ(ierr);
707   if (!aij) {
708     ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJSetPreallocation_C",&aij);CHKERRQ(ierr);
709   }
710   if (!aij) {
711     ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIBAIJSetPreallocation_C",&baij);CHKERRQ(ierr);
712     if (!baij) {
713       ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",&baij);CHKERRQ(ierr);
714     }
715     if (!baij) {
716       ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPISBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr);
717       if (!sbaij) {
718         ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr);
719       }
720     }
721   }
722   if (aij) {
723     if (dim == 1) {
724       if (dd->ofill) {
725         ierr = DMCreateMatrix_DA_1d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
726       } else {
727         ierr = DMCreateMatrix_DA_1d_MPIAIJ(da,A);CHKERRQ(ierr);
728       }
729     } else if (dim == 2) {
730       if (dd->ofill) {
731         ierr = DMCreateMatrix_DA_2d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
732       } else {
733         ierr = DMCreateMatrix_DA_2d_MPIAIJ(da,A);CHKERRQ(ierr);
734       }
735     } else if (dim == 3) {
736       if (dd->ofill) {
737         ierr = DMCreateMatrix_DA_3d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
738       } else {
739         ierr = DMCreateMatrix_DA_3d_MPIAIJ(da,A);CHKERRQ(ierr);
740       }
741     }
742   } else if (baij) {
743     if (dim == 2) {
744       ierr = DMCreateMatrix_DA_2d_MPIBAIJ(da,A);CHKERRQ(ierr);
745     } else if (dim == 3) {
746       ierr = DMCreateMatrix_DA_3d_MPIBAIJ(da,A);CHKERRQ(ierr);
747     } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
748   } else if (sbaij) {
749     if (dim == 2) {
750       ierr = DMCreateMatrix_DA_2d_MPISBAIJ(da,A);CHKERRQ(ierr);
751     } else if (dim == 3) {
752       ierr = DMCreateMatrix_DA_3d_MPISBAIJ(da,A);CHKERRQ(ierr);
753     } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
754   } else {
755     ISLocalToGlobalMapping ltog;
756     ierr = MatSetBlockSize(A,dof);CHKERRQ(ierr);
757     ierr = MatSetUp(A);CHKERRQ(ierr);
758     ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
759     ierr = MatSetLocalToGlobalMapping(A,ltog,ltog);CHKERRQ(ierr);
760   }
761   ierr = DMDAGetGhostCorners(da,&starts[0],&starts[1],&starts[2],&dims[0],&dims[1],&dims[2]);CHKERRQ(ierr);
762   ierr = MatSetStencil(A,dim,dims,starts,dof);CHKERRQ(ierr);
763   ierr = MatSetDM(A,da);CHKERRQ(ierr);
764   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
765   if (size > 1) {
766     /* change viewer to display matrix in natural ordering */
767     ierr = MatShellSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA);CHKERRQ(ierr);
768     ierr = MatShellSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA);CHKERRQ(ierr);
769   }
770   ierr = MatSetFromOptions(A);CHKERRQ(ierr);
771   *J = A;
772   PetscFunctionReturn(0);
773 }
774 
775 /* ---------------------------------------------------------------------------------*/
776 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da,Mat J)
777 {
778   PetscErrorCode         ierr;
779   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,M,N;
780   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
781   MPI_Comm               comm;
782   PetscScalar            *values;
783   DMBoundaryType         bx,by;
784   ISLocalToGlobalMapping ltog;
785   DMDAStencilType        st;
786   PetscBool              removedups = PETSC_FALSE;
787 
788   PetscFunctionBegin;
789   /*
790          nc - number of components per grid point
791          col - number of colors needed in one direction for single component problem
792 
793   */
794   ierr = DMDAGetInfo(da,&dim,&m,&n,&M,&N,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
795   col  = 2*s + 1;
796   /*
797        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
798        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
799   */
800   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
801   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
802   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
803   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
804   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
805 
806   ierr = PetscMalloc2(nc,&rows,col*col*nc*nc,&cols);CHKERRQ(ierr);
807   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
808 
809   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
810   /* determine the matrix preallocation information */
811   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
812   for (i=xs; i<xs+nx; i++) {
813 
814     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
815     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
816 
817     for (j=ys; j<ys+ny; j++) {
818       slot = i - gxs + gnx*(j - gys);
819 
820       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
821       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
822 
823       cnt = 0;
824       for (k=0; k<nc; k++) {
825         for (l=lstart; l<lend+1; l++) {
826           for (p=pstart; p<pend+1; p++) {
827             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
828               cols[cnt++] = k + nc*(slot + gnx*l + p);
829             }
830           }
831         }
832         rows[k] = k + nc*(slot);
833       }
834       if (removedups) {
835         ierr = MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
836       } else {
837         ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
838       }
839     }
840   }
841   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
842   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
843   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
844   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
845 
846   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
847 
848   /*
849     For each node in the grid: we get the neighbors in the local (on processor ordering
850     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
851     PETSc ordering.
852   */
853   if (!da->prealloc_only) {
854     ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr);
855     for (i=xs; i<xs+nx; i++) {
856 
857       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
858       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
859 
860       for (j=ys; j<ys+ny; j++) {
861         slot = i - gxs + gnx*(j - gys);
862 
863         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
864         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
865 
866         cnt = 0;
867         for (k=0; k<nc; k++) {
868           for (l=lstart; l<lend+1; l++) {
869             for (p=pstart; p<pend+1; p++) {
870               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
871                 cols[cnt++] = k + nc*(slot + gnx*l + p);
872               }
873             }
874           }
875           rows[k] = k + nc*(slot);
876         }
877         ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
878       }
879     }
880     ierr = PetscFree(values);CHKERRQ(ierr);
881     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
882     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
883     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
884   }
885   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
886   PetscFunctionReturn(0);
887 }
888 
889 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da,Mat J)
890 {
891   PetscErrorCode         ierr;
892   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
893   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt,maxcnt = 0,l,p,M,N;
894   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
895   DM_DA                  *dd = (DM_DA*)da->data;
896   PetscInt               ifill_col,*ofill = dd->ofill, *dfill = dd->dfill;
897   MPI_Comm               comm;
898   PetscScalar            *values;
899   DMBoundaryType         bx,by;
900   ISLocalToGlobalMapping ltog;
901   DMDAStencilType        st;
902   PetscBool              removedups = PETSC_FALSE;
903 
904   PetscFunctionBegin;
905   /*
906          nc - number of components per grid point
907          col - number of colors needed in one direction for single component problem
908 
909   */
910   ierr = DMDAGetInfo(da,&dim,&m,&n,&M,&N,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
911   col  = 2*s + 1;
912   /*
913        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
914        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
915   */
916   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
917   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
918   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
919   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
920   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
921 
922   ierr = PetscMalloc1(col*col*nc,&cols);CHKERRQ(ierr);
923   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
924 
925   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
926   /* determine the matrix preallocation information */
927   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
928   for (i=xs; i<xs+nx; i++) {
929 
930     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
931     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
932 
933     for (j=ys; j<ys+ny; j++) {
934       slot = i - gxs + gnx*(j - gys);
935 
936       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
937       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
938 
939       for (k=0; k<nc; k++) {
940         cnt = 0;
941         for (l=lstart; l<lend+1; l++) {
942           for (p=pstart; p<pend+1; p++) {
943             if (l || p) {
944               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
945                 for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p);
946               }
947             } else {
948               if (dfill) {
949                 for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p);
950               } else {
951                 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p);
952               }
953             }
954           }
955         }
956         row    = k + nc*(slot);
957         maxcnt = PetscMax(maxcnt,cnt);
958         if (removedups) {
959           ierr   = MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
960         } else {
961           ierr   = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
962         }
963       }
964     }
965   }
966   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
967   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
968   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
969   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
970 
971   /*
972     For each node in the grid: we get the neighbors in the local (on processor ordering
973     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
974     PETSc ordering.
975   */
976   if (!da->prealloc_only) {
977     ierr = PetscCalloc1(maxcnt,&values);CHKERRQ(ierr);
978     for (i=xs; i<xs+nx; i++) {
979 
980       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
981       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
982 
983       for (j=ys; j<ys+ny; j++) {
984         slot = i - gxs + gnx*(j - gys);
985 
986         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
987         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
988 
989         for (k=0; k<nc; k++) {
990           cnt = 0;
991           for (l=lstart; l<lend+1; l++) {
992             for (p=pstart; p<pend+1; p++) {
993               if (l || p) {
994                 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
995                   for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p);
996                 }
997               } else {
998                 if (dfill) {
999                   for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p);
1000                 } else {
1001                   for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p);
1002                 }
1003               }
1004             }
1005           }
1006           row  = k + nc*(slot);
1007           ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1008         }
1009       }
1010     }
1011     ierr = PetscFree(values);CHKERRQ(ierr);
1012     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1013     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1014     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1015   }
1016   ierr = PetscFree(cols);CHKERRQ(ierr);
1017   PetscFunctionReturn(0);
1018 }
1019 
1020 /* ---------------------------------------------------------------------------------*/
1021 
1022 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da,Mat J)
1023 {
1024   PetscErrorCode         ierr;
1025   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1026   PetscInt               m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL;
1027   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
1028   MPI_Comm               comm;
1029   PetscScalar            *values;
1030   DMBoundaryType         bx,by,bz;
1031   ISLocalToGlobalMapping ltog;
1032   DMDAStencilType        st;
1033   PetscBool              removedups = PETSC_FALSE;
1034 
1035   PetscFunctionBegin;
1036   /*
1037          nc - number of components per grid point
1038          col - number of colors needed in one direction for single component problem
1039 
1040   */
1041   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1042   col  = 2*s + 1;
1043 
1044   /*
1045        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1046        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1047   */
1048   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1049   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1050   if (P == 1 && 2*s >= p) removedups = PETSC_TRUE;
1051 
1052   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1053   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1054   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1055 
1056   ierr = PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols);CHKERRQ(ierr);
1057   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1058 
1059   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1060   /* determine the matrix preallocation information */
1061   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1062   for (i=xs; i<xs+nx; i++) {
1063     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1064     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1065     for (j=ys; j<ys+ny; j++) {
1066       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1067       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1068       for (k=zs; k<zs+nz; k++) {
1069         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1070         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1071 
1072         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1073 
1074         cnt = 0;
1075         for (l=0; l<nc; l++) {
1076           for (ii=istart; ii<iend+1; ii++) {
1077             for (jj=jstart; jj<jend+1; jj++) {
1078               for (kk=kstart; kk<kend+1; kk++) {
1079                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1080                   cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1081                 }
1082               }
1083             }
1084           }
1085           rows[l] = l + nc*(slot);
1086         }
1087         if (removedups) {
1088           ierr = MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1089         } else {
1090           ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1091         }
1092       }
1093     }
1094   }
1095   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1096   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1097   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1098   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1099   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1100 
1101   /*
1102     For each node in the grid: we get the neighbors in the local (on processor ordering
1103     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1104     PETSc ordering.
1105   */
1106   if (!da->prealloc_only) {
1107     ierr = PetscCalloc1(col*col*col*nc*nc*nc,&values);CHKERRQ(ierr);
1108     for (i=xs; i<xs+nx; i++) {
1109       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1110       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1111       for (j=ys; j<ys+ny; j++) {
1112         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1113         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1114         for (k=zs; k<zs+nz; k++) {
1115           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1116           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1117 
1118           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1119 
1120           cnt = 0;
1121           for (l=0; l<nc; l++) {
1122             for (ii=istart; ii<iend+1; ii++) {
1123               for (jj=jstart; jj<jend+1; jj++) {
1124                 for (kk=kstart; kk<kend+1; kk++) {
1125                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1126                     cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1127                   }
1128                 }
1129               }
1130             }
1131             rows[l] = l + nc*(slot);
1132           }
1133           ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1134         }
1135       }
1136     }
1137     ierr = PetscFree(values);CHKERRQ(ierr);
1138     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1139     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1140     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1141   }
1142   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1143   PetscFunctionReturn(0);
1144 }
1145 
1146 /* ---------------------------------------------------------------------------------*/
1147 
1148 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da,Mat J)
1149 {
1150   PetscErrorCode         ierr;
1151   DM_DA                  *dd = (DM_DA*)da->data;
1152   PetscInt               xs,nx,i,j,gxs,gnx,row,k,l;
1153   PetscInt               m,dim,s,*cols = NULL,nc,cnt,maxcnt = 0,*ocols;
1154   PetscInt               *ofill = dd->ofill,*dfill = dd->dfill;
1155   PetscScalar            *values;
1156   DMBoundaryType         bx;
1157   ISLocalToGlobalMapping ltog;
1158   PetscMPIInt            rank,size;
1159 
1160   PetscFunctionBegin;
1161   if (dd->bx == DM_BOUNDARY_PERIODIC) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"With fill provided not implemented with periodic boundary conditions");
1162   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)da),&rank);CHKERRQ(ierr);
1163   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)da),&size);CHKERRQ(ierr);
1164 
1165   /*
1166          nc - number of components per grid point
1167 
1168   */
1169   ierr = DMDAGetInfo(da,&dim,&m,0,0,0,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr);
1170   ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
1171   ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
1172 
1173   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1174   ierr = PetscCalloc2(nx*nc,&cols,nx*nc,&ocols);CHKERRQ(ierr);
1175 
1176   /*
1177         note should be smaller for first and last process with no periodic
1178         does not handle dfill
1179   */
1180   cnt = 0;
1181   /* coupling with process to the left */
1182   for (i=0; i<s; i++) {
1183     for (j=0; j<nc; j++) {
1184       ocols[cnt] = ((!rank) ? 0 : (s - i)*(ofill[j+1] - ofill[j]));
1185       cols[cnt]  = dfill[j+1] - dfill[j] + (s + i)*(ofill[j+1] - ofill[j]);
1186       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1187       cnt++;
1188     }
1189   }
1190   for (i=s; i<nx-s; i++) {
1191     for (j=0; j<nc; j++) {
1192       cols[cnt] = dfill[j+1] - dfill[j] + 2*s*(ofill[j+1] - ofill[j]);
1193       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1194       cnt++;
1195     }
1196   }
1197   /* coupling with process to the right */
1198   for (i=nx-s; i<nx; i++) {
1199     for (j=0; j<nc; j++) {
1200       ocols[cnt] = ((rank == (size-1)) ? 0 : (i - nx + s + 1)*(ofill[j+1] - ofill[j]));
1201       cols[cnt]  = dfill[j+1] - dfill[j] + (s + nx - i - 1)*(ofill[j+1] - ofill[j]);
1202       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1203       cnt++;
1204     }
1205   }
1206 
1207   ierr = MatSeqAIJSetPreallocation(J,0,cols);CHKERRQ(ierr);
1208   ierr = MatMPIAIJSetPreallocation(J,0,cols,0,ocols);CHKERRQ(ierr);
1209   ierr = PetscFree2(cols,ocols);CHKERRQ(ierr);
1210 
1211   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1212   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1213 
1214   /*
1215     For each node in the grid: we get the neighbors in the local (on processor ordering
1216     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1217     PETSc ordering.
1218   */
1219   if (!da->prealloc_only) {
1220     ierr = PetscCalloc2(maxcnt,&values,maxcnt,&cols);CHKERRQ(ierr);
1221 
1222     row = xs*nc;
1223     /* coupling with process to the left */
1224     for (i=xs; i<xs+s; i++) {
1225       for (j=0; j<nc; j++) {
1226         cnt = 0;
1227         if (rank) {
1228           for (l=0; l<s; l++) {
1229             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1230           }
1231         }
1232         if (dfill) {
1233           for (k=dfill[j]; k<dfill[j+1]; k++) {
1234             cols[cnt++] = i*nc + dfill[k];
1235           }
1236         } else {
1237           for (k=0; k<nc; k++) {
1238             cols[cnt++] = i*nc + k;
1239           }
1240         }
1241         for (l=0; l<s; l++) {
1242           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1243         }
1244         ierr = MatSetValues(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1245         row++;
1246       }
1247     }
1248     for (i=xs+s; i<xs+nx-s; i++) {
1249       for (j=0; j<nc; j++) {
1250         cnt = 0;
1251         for (l=0; l<s; l++) {
1252           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1253         }
1254         if (dfill) {
1255           for (k=dfill[j]; k<dfill[j+1]; k++) {
1256             cols[cnt++] = i*nc + dfill[k];
1257           }
1258         } else {
1259           for (k=0; k<nc; k++) {
1260             cols[cnt++] = i*nc + k;
1261           }
1262         }
1263         for (l=0; l<s; l++) {
1264           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1265         }
1266         ierr = MatSetValues(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1267         row++;
1268       }
1269     }
1270     /* coupling with process to the right */
1271     for (i=xs+nx-s; i<xs+nx; i++) {
1272       for (j=0; j<nc; j++) {
1273         cnt = 0;
1274         for (l=0; l<s; l++) {
1275           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1276         }
1277         if (dfill) {
1278           for (k=dfill[j]; k<dfill[j+1]; k++) {
1279             cols[cnt++] = i*nc + dfill[k];
1280           }
1281         } else {
1282           for (k=0; k<nc; k++) {
1283             cols[cnt++] = i*nc + k;
1284           }
1285         }
1286         if (rank < size-1) {
1287           for (l=0; l<s; l++) {
1288             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1289           }
1290         }
1291         ierr = MatSetValues(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1292         row++;
1293       }
1294     }
1295     ierr = PetscFree2(values,cols);CHKERRQ(ierr);
1296     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1297     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1298     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1299   }
1300   PetscFunctionReturn(0);
1301 }
1302 
1303 /* ---------------------------------------------------------------------------------*/
1304 
1305 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da,Mat J)
1306 {
1307   PetscErrorCode         ierr;
1308   PetscInt               xs,nx,i,i1,slot,gxs,gnx;
1309   PetscInt               m,dim,s,*cols = NULL,nc,*rows = NULL,col,cnt,l;
1310   PetscInt               istart,iend;
1311   PetscScalar            *values;
1312   DMBoundaryType         bx;
1313   ISLocalToGlobalMapping ltog;
1314 
1315   PetscFunctionBegin;
1316   /*
1317          nc - number of components per grid point
1318          col - number of colors needed in one direction for single component problem
1319 
1320   */
1321   ierr = DMDAGetInfo(da,&dim,&m,0,0,0,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr);
1322   col  = 2*s + 1;
1323 
1324   ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
1325   ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
1326 
1327   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1328   ierr = MatSeqAIJSetPreallocation(J,col*nc,0);CHKERRQ(ierr);
1329   ierr = MatMPIAIJSetPreallocation(J,col*nc,0,col*nc,0);CHKERRQ(ierr);
1330 
1331   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1332   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1333 
1334   /*
1335     For each node in the grid: we get the neighbors in the local (on processor ordering
1336     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1337     PETSc ordering.
1338   */
1339   if (!da->prealloc_only) {
1340     ierr = PetscMalloc2(nc,&rows,col*nc*nc,&cols);CHKERRQ(ierr);
1341     ierr = PetscCalloc1(col*nc*nc,&values);CHKERRQ(ierr);
1342     for (i=xs; i<xs+nx; i++) {
1343       istart = PetscMax(-s,gxs - i);
1344       iend   = PetscMin(s,gxs + gnx - i - 1);
1345       slot   = i - gxs;
1346 
1347       cnt = 0;
1348       for (l=0; l<nc; l++) {
1349         for (i1=istart; i1<iend+1; i1++) {
1350           cols[cnt++] = l + nc*(slot + i1);
1351         }
1352         rows[l] = l + nc*(slot);
1353       }
1354       ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1355     }
1356     ierr = PetscFree(values);CHKERRQ(ierr);
1357     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1358     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1359     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1360     ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1361   }
1362   PetscFunctionReturn(0);
1363 }
1364 
1365 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da,Mat J)
1366 {
1367   PetscErrorCode         ierr;
1368   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1369   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
1370   PetscInt               istart,iend,jstart,jend,ii,jj;
1371   MPI_Comm               comm;
1372   PetscScalar            *values;
1373   DMBoundaryType         bx,by;
1374   DMDAStencilType        st;
1375   ISLocalToGlobalMapping ltog;
1376 
1377   PetscFunctionBegin;
1378   /*
1379      nc - number of components per grid point
1380      col - number of colors needed in one direction for single component problem
1381   */
1382   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
1383   col  = 2*s + 1;
1384 
1385   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
1386   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
1387   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1388 
1389   ierr = PetscMalloc1(col*col*nc*nc,&cols);CHKERRQ(ierr);
1390 
1391   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1392 
1393   /* determine the matrix preallocation information */
1394   ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
1395   for (i=xs; i<xs+nx; i++) {
1396     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1397     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1398     for (j=ys; j<ys+ny; j++) {
1399       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1400       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1401       slot   = i - gxs + gnx*(j - gys);
1402 
1403       /* Find block columns in block row */
1404       cnt = 0;
1405       for (ii=istart; ii<iend+1; ii++) {
1406         for (jj=jstart; jj<jend+1; jj++) {
1407           if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1408             cols[cnt++] = slot + ii + gnx*jj;
1409           }
1410         }
1411       }
1412       ierr = MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1413     }
1414   }
1415   ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1416   ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1417   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1418 
1419   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1420 
1421   /*
1422     For each node in the grid: we get the neighbors in the local (on processor ordering
1423     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1424     PETSc ordering.
1425   */
1426   if (!da->prealloc_only) {
1427     ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr);
1428     for (i=xs; i<xs+nx; i++) {
1429       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1430       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1431       for (j=ys; j<ys+ny; j++) {
1432         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1433         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1434         slot = i - gxs + gnx*(j - gys);
1435         cnt  = 0;
1436         for (ii=istart; ii<iend+1; ii++) {
1437           for (jj=jstart; jj<jend+1; jj++) {
1438             if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1439               cols[cnt++] = slot + ii + gnx*jj;
1440             }
1441           }
1442         }
1443         ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1444       }
1445     }
1446     ierr = PetscFree(values);CHKERRQ(ierr);
1447     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1448     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1449     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1450   }
1451   ierr = PetscFree(cols);CHKERRQ(ierr);
1452   PetscFunctionReturn(0);
1453 }
1454 
1455 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da,Mat J)
1456 {
1457   PetscErrorCode         ierr;
1458   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1459   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
1460   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1461   MPI_Comm               comm;
1462   PetscScalar            *values;
1463   DMBoundaryType         bx,by,bz;
1464   DMDAStencilType        st;
1465   ISLocalToGlobalMapping ltog;
1466 
1467   PetscFunctionBegin;
1468   /*
1469          nc - number of components per grid point
1470          col - number of colors needed in one direction for single component problem
1471 
1472   */
1473   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1474   col  = 2*s + 1;
1475 
1476   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1477   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1478   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1479 
1480   ierr = PetscMalloc1(col*col*col,&cols);CHKERRQ(ierr);
1481 
1482   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1483 
1484   /* determine the matrix preallocation information */
1485   ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1486   for (i=xs; i<xs+nx; i++) {
1487     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1488     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1489     for (j=ys; j<ys+ny; j++) {
1490       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1491       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1492       for (k=zs; k<zs+nz; k++) {
1493         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1494         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1495 
1496         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1497 
1498         /* Find block columns in block row */
1499         cnt = 0;
1500         for (ii=istart; ii<iend+1; ii++) {
1501           for (jj=jstart; jj<jend+1; jj++) {
1502             for (kk=kstart; kk<kend+1; kk++) {
1503               if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1504                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1505               }
1506             }
1507           }
1508         }
1509         ierr = MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1510       }
1511     }
1512   }
1513   ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1514   ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1515   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1516 
1517   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1518 
1519   /*
1520     For each node in the grid: we get the neighbors in the local (on processor ordering
1521     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1522     PETSc ordering.
1523   */
1524   if (!da->prealloc_only) {
1525     ierr = PetscCalloc1(col*col*col*nc*nc,&values);CHKERRQ(ierr);
1526     for (i=xs; i<xs+nx; i++) {
1527       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1528       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1529       for (j=ys; j<ys+ny; j++) {
1530         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1531         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1532         for (k=zs; k<zs+nz; k++) {
1533           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1534           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1535 
1536           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1537 
1538           cnt = 0;
1539           for (ii=istart; ii<iend+1; ii++) {
1540             for (jj=jstart; jj<jend+1; jj++) {
1541               for (kk=kstart; kk<kend+1; kk++) {
1542                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1543                   cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1544                 }
1545               }
1546             }
1547           }
1548           ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1549         }
1550       }
1551     }
1552     ierr = PetscFree(values);CHKERRQ(ierr);
1553     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1554     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1555     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1556   }
1557   ierr = PetscFree(cols);CHKERRQ(ierr);
1558   PetscFunctionReturn(0);
1559 }
1560 
1561 /*
1562   This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to
1563   identify in the local ordering with periodic domain.
1564 */
1565 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog,PetscInt *row,PetscInt *cnt,PetscInt col[])
1566 {
1567   PetscErrorCode ierr;
1568   PetscInt       i,n;
1569 
1570   PetscFunctionBegin;
1571   ierr = ISLocalToGlobalMappingApplyBlock(ltog,1,row,row);CHKERRQ(ierr);
1572   ierr = ISLocalToGlobalMappingApplyBlock(ltog,*cnt,col,col);CHKERRQ(ierr);
1573   for (i=0,n=0; i<*cnt; i++) {
1574     if (col[i] >= *row) col[n++] = col[i];
1575   }
1576   *cnt = n;
1577   PetscFunctionReturn(0);
1578 }
1579 
1580 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da,Mat J)
1581 {
1582   PetscErrorCode         ierr;
1583   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1584   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
1585   PetscInt               istart,iend,jstart,jend,ii,jj;
1586   MPI_Comm               comm;
1587   PetscScalar            *values;
1588   DMBoundaryType         bx,by;
1589   DMDAStencilType        st;
1590   ISLocalToGlobalMapping ltog;
1591 
1592   PetscFunctionBegin;
1593   /*
1594      nc - number of components per grid point
1595      col - number of colors needed in one direction for single component problem
1596   */
1597   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
1598   col  = 2*s + 1;
1599 
1600   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
1601   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
1602   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1603 
1604   ierr = PetscMalloc1(col*col*nc*nc,&cols);CHKERRQ(ierr);
1605 
1606   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1607 
1608   /* determine the matrix preallocation information */
1609   ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
1610   for (i=xs; i<xs+nx; i++) {
1611     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1612     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1613     for (j=ys; j<ys+ny; j++) {
1614       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1615       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1616       slot   = i - gxs + gnx*(j - gys);
1617 
1618       /* Find block columns in block row */
1619       cnt = 0;
1620       for (ii=istart; ii<iend+1; ii++) {
1621         for (jj=jstart; jj<jend+1; jj++) {
1622           if (st == DMDA_STENCIL_BOX || !ii || !jj) {
1623             cols[cnt++] = slot + ii + gnx*jj;
1624           }
1625         }
1626       }
1627       ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
1628       ierr = MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
1629     }
1630   }
1631   ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1632   ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1633   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1634 
1635   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1636 
1637   /*
1638     For each node in the grid: we get the neighbors in the local (on processor ordering
1639     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1640     PETSc ordering.
1641   */
1642   if (!da->prealloc_only) {
1643     ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr);
1644     for (i=xs; i<xs+nx; i++) {
1645       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1646       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1647       for (j=ys; j<ys+ny; j++) {
1648         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1649         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1650         slot   = i - gxs + gnx*(j - gys);
1651 
1652         /* Find block columns in block row */
1653         cnt = 0;
1654         for (ii=istart; ii<iend+1; ii++) {
1655           for (jj=jstart; jj<jend+1; jj++) {
1656             if (st == DMDA_STENCIL_BOX || !ii || !jj) {
1657               cols[cnt++] = slot + ii + gnx*jj;
1658             }
1659           }
1660         }
1661         ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
1662         ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1663       }
1664     }
1665     ierr = PetscFree(values);CHKERRQ(ierr);
1666     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1667     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1668     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1669   }
1670   ierr = PetscFree(cols);CHKERRQ(ierr);
1671   PetscFunctionReturn(0);
1672 }
1673 
1674 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da,Mat J)
1675 {
1676   PetscErrorCode         ierr;
1677   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1678   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
1679   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1680   MPI_Comm               comm;
1681   PetscScalar            *values;
1682   DMBoundaryType         bx,by,bz;
1683   DMDAStencilType        st;
1684   ISLocalToGlobalMapping ltog;
1685 
1686   PetscFunctionBegin;
1687   /*
1688      nc - number of components per grid point
1689      col - number of colors needed in one direction for single component problem
1690   */
1691   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1692   col  = 2*s + 1;
1693 
1694   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1695   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1696   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1697 
1698   /* create the matrix */
1699   ierr = PetscMalloc1(col*col*col,&cols);CHKERRQ(ierr);
1700 
1701   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1702 
1703   /* determine the matrix preallocation information */
1704   ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1705   for (i=xs; i<xs+nx; i++) {
1706     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1707     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1708     for (j=ys; j<ys+ny; j++) {
1709       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1710       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1711       for (k=zs; k<zs+nz; k++) {
1712         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1713         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1714 
1715         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1716 
1717         /* Find block columns in block row */
1718         cnt = 0;
1719         for (ii=istart; ii<iend+1; ii++) {
1720           for (jj=jstart; jj<jend+1; jj++) {
1721             for (kk=kstart; kk<kend+1; kk++) {
1722               if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
1723                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1724               }
1725             }
1726           }
1727         }
1728         ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
1729         ierr = MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
1730       }
1731     }
1732   }
1733   ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1734   ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1735   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1736 
1737   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1738 
1739   /*
1740     For each node in the grid: we get the neighbors in the local (on processor ordering
1741     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1742     PETSc ordering.
1743   */
1744   if (!da->prealloc_only) {
1745     ierr = PetscCalloc1(col*col*col*nc*nc,&values);CHKERRQ(ierr);
1746     for (i=xs; i<xs+nx; i++) {
1747       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1748       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1749       for (j=ys; j<ys+ny; j++) {
1750         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1751         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1752         for (k=zs; k<zs+nz; k++) {
1753           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1754           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1755 
1756           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1757 
1758           cnt = 0;
1759           for (ii=istart; ii<iend+1; ii++) {
1760             for (jj=jstart; jj<jend+1; jj++) {
1761               for (kk=kstart; kk<kend+1; kk++) {
1762                 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
1763                   cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1764                 }
1765               }
1766             }
1767           }
1768           ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
1769           ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1770         }
1771       }
1772     }
1773     ierr = PetscFree(values);CHKERRQ(ierr);
1774     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1775     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1776     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1777   }
1778   ierr = PetscFree(cols);CHKERRQ(ierr);
1779   PetscFunctionReturn(0);
1780 }
1781 
1782 /* ---------------------------------------------------------------------------------*/
1783 
1784 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da,Mat J)
1785 {
1786   PetscErrorCode         ierr;
1787   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1788   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt, maxcnt = 0,l,p,*dnz,*onz;
1789   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
1790   DM_DA                  *dd = (DM_DA*)da->data;
1791   PetscInt               ifill_col,*dfill = dd->dfill,*ofill = dd->ofill;
1792   MPI_Comm               comm;
1793   PetscScalar            *values;
1794   DMBoundaryType         bx,by,bz;
1795   ISLocalToGlobalMapping ltog;
1796   DMDAStencilType        st;
1797   PetscBool              removedups = PETSC_FALSE;
1798 
1799   PetscFunctionBegin;
1800   /*
1801          nc - number of components per grid point
1802          col - number of colors needed in one direction for single component problem
1803 
1804   */
1805   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1806   col  = 2*s + 1;
1807   if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
1808                  by 2*stencil_width + 1\n");
1809   if (by == DM_BOUNDARY_PERIODIC && (n % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
1810                  by 2*stencil_width + 1\n");
1811   if (bz == DM_BOUNDARY_PERIODIC && (p % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
1812                  by 2*stencil_width + 1\n");
1813 
1814   /*
1815        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1816        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1817   */
1818   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1819   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1820   if (P == 1 && 2*s >= p) removedups = PETSC_TRUE;
1821 
1822   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1823   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1824   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1825 
1826   ierr = PetscMalloc1(col*col*col*nc,&cols);CHKERRQ(ierr);
1827   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1828 
1829   /* determine the matrix preallocation information */
1830   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1831 
1832   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1833   for (i=xs; i<xs+nx; i++) {
1834     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1835     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1836     for (j=ys; j<ys+ny; j++) {
1837       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1838       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1839       for (k=zs; k<zs+nz; k++) {
1840         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1841         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1842 
1843         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1844 
1845         for (l=0; l<nc; l++) {
1846           cnt = 0;
1847           for (ii=istart; ii<iend+1; ii++) {
1848             for (jj=jstart; jj<jend+1; jj++) {
1849               for (kk=kstart; kk<kend+1; kk++) {
1850                 if (ii || jj || kk) {
1851                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1852                     for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1853                   }
1854                 } else {
1855                   if (dfill) {
1856                     for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1857                   } else {
1858                     for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1859                   }
1860                 }
1861               }
1862             }
1863           }
1864           row  = l + nc*(slot);
1865           maxcnt = PetscMax(maxcnt,cnt);
1866           if (removedups) {
1867             ierr = MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1868           } else {
1869             ierr = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1870           }
1871         }
1872       }
1873     }
1874   }
1875   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1876   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1877   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1878   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1879 
1880   /*
1881     For each node in the grid: we get the neighbors in the local (on processor ordering
1882     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1883     PETSc ordering.
1884   */
1885   if (!da->prealloc_only) {
1886     ierr = PetscCalloc1(maxcnt,&values);CHKERRQ(ierr);
1887     for (i=xs; i<xs+nx; i++) {
1888       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1889       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1890       for (j=ys; j<ys+ny; j++) {
1891         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1892         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1893         for (k=zs; k<zs+nz; k++) {
1894           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1895           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1896 
1897           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1898 
1899           for (l=0; l<nc; l++) {
1900             cnt = 0;
1901             for (ii=istart; ii<iend+1; ii++) {
1902               for (jj=jstart; jj<jend+1; jj++) {
1903                 for (kk=kstart; kk<kend+1; kk++) {
1904                   if (ii || jj || kk) {
1905                     if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1906                       for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1907                     }
1908                   } else {
1909                     if (dfill) {
1910                       for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1911                     } else {
1912                       for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1913                     }
1914                   }
1915                 }
1916               }
1917             }
1918             row  = l + nc*(slot);
1919             ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1920           }
1921         }
1922       }
1923     }
1924     ierr = PetscFree(values);CHKERRQ(ierr);
1925     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1926     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1927     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1928   }
1929   ierr = PetscFree(cols);CHKERRQ(ierr);
1930   PetscFunctionReturn(0);
1931 }
1932