xref: /petsc/src/dm/impls/da/fdda.c (revision 2f6eced2a19e978d64f27de66fbc6c26cc5c7934)
1 
2 #include <petsc/private/dmdaimpl.h> /*I      "petscdmda.h"     I*/
3 #include <petscmat.h>
4 
5 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM,ISColoringType,ISColoring*);
6 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM,ISColoringType,ISColoring*);
7 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM,ISColoringType,ISColoring*);
8 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM,ISColoringType,ISColoring*);
9 
10 /*
11    For ghost i that may be negative or greater than the upper bound this
12   maps it into the 0:m-1 range using periodicity
13 */
14 #define SetInRange(i,m) ((i < 0) ? m+i : ((i >= m) ? i-m : i))
15 
16 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill,PetscInt w,PetscInt **rfill)
17 {
18   PetscErrorCode ierr;
19   PetscInt       i,j,nz,*fill;
20 
21   PetscFunctionBegin;
22   if (!dfill) PetscFunctionReturn(0);
23 
24   /* count number nonzeros */
25   nz = 0;
26   for (i=0; i<w; i++) {
27     for (j=0; j<w; j++) {
28       if (dfill[w*i+j]) nz++;
29     }
30   }
31   ierr = PetscMalloc1(nz + w + 1,&fill);CHKERRQ(ierr);
32   /* construct modified CSR storage of nonzero structure */
33   /*  fill[0 -- w] marks starts of each row of column indices (and end of last row)
34    so fill[1] - fill[0] gives number of nonzeros in first row etc */
35   nz = w + 1;
36   for (i=0; i<w; i++) {
37     fill[i] = nz;
38     for (j=0; j<w; j++) {
39       if (dfill[w*i+j]) {
40         fill[nz] = j;
41         nz++;
42       }
43     }
44   }
45   fill[w] = nz;
46 
47   *rfill = fill;
48   PetscFunctionReturn(0);
49 }
50 
51 /*@
52     DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem
53     of the matrix returned by DMCreateMatrix().
54 
55     Logically Collective on DMDA
56 
57     Input Parameter:
58 +   da - the distributed array
59 .   dfill - the fill pattern in the diagonal block (may be NULL, means use dense block)
60 -   ofill - the fill pattern in the off-diagonal blocks
61 
62 
63     Level: developer
64 
65     Notes: This only makes sense when you are doing multicomponent problems but using the
66        MPIAIJ matrix format
67 
68            The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries
69        representing coupling and 0 entries for missing coupling. For example
70 $             dfill[9] = {1, 0, 0,
71 $                         1, 1, 0,
72 $                         0, 1, 1}
73        means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with
74        itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the
75        diagonal block).
76 
77      DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
78      can be represented in the dfill, ofill format
79 
80    Contributed by Glenn Hammond
81 
82 .seealso DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly()
83 
84 @*/
85 PetscErrorCode  DMDASetBlockFills(DM da,const PetscInt *dfill,const PetscInt *ofill)
86 {
87   DM_DA          *dd = (DM_DA*)da->data;
88   PetscErrorCode ierr;
89   PetscInt       i,k,cnt = 1;
90 
91   PetscFunctionBegin;
92   ierr = DMDASetBlockFills_Private(dfill,dd->w,&dd->dfill);CHKERRQ(ierr);
93   ierr = DMDASetBlockFills_Private(ofill,dd->w,&dd->ofill);CHKERRQ(ierr);
94 
95   /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of
96    columns to the left with any nonzeros in them plus 1 */
97   ierr = PetscCalloc1(dd->w,&dd->ofillcols);CHKERRQ(ierr);
98   for (i=0; i<dd->w; i++) {
99     for (k=dd->ofill[i]; k<dd->ofill[i+1]; k++) dd->ofillcols[dd->ofill[k]] = 1;
100   }
101   for (i=0; i<dd->w; i++) {
102     if (dd->ofillcols[i]) {
103       dd->ofillcols[i] = cnt++;
104     }
105   }
106   PetscFunctionReturn(0);
107 }
108 
109 
110 PetscErrorCode  DMCreateColoring_DA(DM da,ISColoringType ctype,ISColoring *coloring)
111 {
112   PetscErrorCode   ierr;
113   PetscInt         dim,m,n,p,nc;
114   DMBoundaryType bx,by,bz;
115   MPI_Comm         comm;
116   PetscMPIInt      size;
117   PetscBool        isBAIJ;
118   DM_DA            *dd = (DM_DA*)da->data;
119 
120   PetscFunctionBegin;
121   /*
122                                   m
123           ------------------------------------------------------
124          |                                                     |
125          |                                                     |
126          |               ----------------------                |
127          |               |                    |                |
128       n  |           yn  |                    |                |
129          |               |                    |                |
130          |               .---------------------                |
131          |             (xs,ys)     xn                          |
132          |            .                                        |
133          |         (gxs,gys)                                   |
134          |                                                     |
135           -----------------------------------------------------
136   */
137 
138   /*
139          nc - number of components per grid point
140          col - number of colors needed in one direction for single component problem
141 
142   */
143   ierr = DMDAGetInfo(da,&dim,0,0,0,&m,&n,&p,&nc,0,&bx,&by,&bz,0);CHKERRQ(ierr);
144 
145   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
146   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
147   if (ctype == IS_COLORING_LOCAL) {
148     if (size == 1) {
149       ctype = IS_COLORING_GLOBAL;
150     } else if (dim > 1) {
151       if ((m==1 && bx == DM_BOUNDARY_PERIODIC) || (n==1 && by == DM_BOUNDARY_PERIODIC) || (p==1 && bz == DM_BOUNDARY_PERIODIC)) {
152         SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain  on the same process");
153       }
154     }
155   }
156 
157   /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ
158      matrices is for the blocks, not the individual matrix elements  */
159   ierr = PetscStrcmp(da->mattype,MATBAIJ,&isBAIJ);CHKERRQ(ierr);
160   if (!isBAIJ) {ierr = PetscStrcmp(da->mattype,MATMPIBAIJ,&isBAIJ);CHKERRQ(ierr);}
161   if (!isBAIJ) {ierr = PetscStrcmp(da->mattype,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);}
162   if (isBAIJ) {
163     dd->w  = 1;
164     dd->xs = dd->xs/nc;
165     dd->xe = dd->xe/nc;
166     dd->Xs = dd->Xs/nc;
167     dd->Xe = dd->Xe/nc;
168   }
169 
170   /*
171      We do not provide a getcoloring function in the DMDA operations because
172    the basic DMDA does not know about matrices. We think of DMDA as being more
173    more low-level then matrices.
174   */
175   if (dim == 1) {
176     ierr = DMCreateColoring_DA_1d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
177   } else if (dim == 2) {
178     ierr =  DMCreateColoring_DA_2d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
179   } else if (dim == 3) {
180     ierr =  DMCreateColoring_DA_3d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
181   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not done for %D dimension, send us mail petsc-maint@mcs.anl.gov for code",dim);
182   if (isBAIJ) {
183     dd->w  = nc;
184     dd->xs = dd->xs*nc;
185     dd->xe = dd->xe*nc;
186     dd->Xs = dd->Xs*nc;
187     dd->Xe = dd->Xe*nc;
188   }
189   PetscFunctionReturn(0);
190 }
191 
192 /* ---------------------------------------------------------------------------------*/
193 
194 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
195 {
196   PetscErrorCode   ierr;
197   PetscInt         xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,M,N,dim,s,k,nc,col;
198   PetscInt         ncolors;
199   MPI_Comm         comm;
200   DMBoundaryType bx,by;
201   DMDAStencilType  st;
202   ISColoringValue  *colors;
203   DM_DA            *dd = (DM_DA*)da->data;
204 
205   PetscFunctionBegin;
206   /*
207          nc - number of components per grid point
208          col - number of colors needed in one direction for single component problem
209 
210   */
211   ierr = DMDAGetInfo(da,&dim,&m,&n,0,&M,&N,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
212   col  = 2*s + 1;
213   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
214   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
215   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
216 
217   /* special case as taught to us by Paul Hovland */
218   if (st == DMDA_STENCIL_STAR && s == 1) {
219     ierr = DMCreateColoring_DA_2d_5pt_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
220   } else {
221 
222     if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ2(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X (%d) is divisible\n\
223                                                             by 2*stencil_width + 1 (%d)\n", m, col);
224     if (by == DM_BOUNDARY_PERIODIC && (n % col)) SETERRQ2(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y (%d) is divisible\n\
225                                                             by 2*stencil_width + 1 (%d)\n", n, col);
226     if (ctype == IS_COLORING_GLOBAL) {
227       if (!dd->localcoloring) {
228         ierr = PetscMalloc1(nc*nx*ny,&colors);CHKERRQ(ierr);
229         ii   = 0;
230         for (j=ys; j<ys+ny; j++) {
231           for (i=xs; i<xs+nx; i++) {
232             for (k=0; k<nc; k++) {
233               colors[ii++] = k + nc*((i % col) + col*(j % col));
234             }
235           }
236         }
237         ncolors = nc + nc*(col-1 + col*(col-1));
238         ierr    = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
239       }
240       *coloring = dd->localcoloring;
241     } else if (ctype == IS_COLORING_LOCAL) {
242       if (!dd->ghostedcoloring) {
243         ierr = PetscMalloc1(nc*gnx*gny,&colors);CHKERRQ(ierr);
244         ii   = 0;
245         for (j=gys; j<gys+gny; j++) {
246           for (i=gxs; i<gxs+gnx; i++) {
247             for (k=0; k<nc; k++) {
248               /* the complicated stuff is to handle periodic boundaries */
249               colors[ii++] = k + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col));
250             }
251           }
252         }
253         ncolors = nc + nc*(col - 1 + col*(col-1));
254         ierr    = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
255         /* PetscIntView(ncolors,(PetscInt*)colors,0); */
256 
257         ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
258       }
259       *coloring = dd->ghostedcoloring;
260     } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
261   }
262   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
263   PetscFunctionReturn(0);
264 }
265 
266 /* ---------------------------------------------------------------------------------*/
267 
268 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
269 {
270   PetscErrorCode   ierr;
271   PetscInt         xs,ys,nx,ny,i,j,gxs,gys,gnx,gny,m,n,p,dim,s,k,nc,col,zs,gzs,ii,l,nz,gnz,M,N,P;
272   PetscInt         ncolors;
273   MPI_Comm         comm;
274   DMBoundaryType bx,by,bz;
275   DMDAStencilType  st;
276   ISColoringValue  *colors;
277   DM_DA            *dd = (DM_DA*)da->data;
278 
279   PetscFunctionBegin;
280   /*
281          nc - number of components per grid point
282          col - number of colors needed in one direction for single component problem
283 
284   */
285   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
286   col  = 2*s + 1;
287   if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
288                                                          by 2*stencil_width + 1\n");
289   if (by == DM_BOUNDARY_PERIODIC && (n % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
290                                                          by 2*stencil_width + 1\n");
291   if (bz == DM_BOUNDARY_PERIODIC && (p % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
292                                                          by 2*stencil_width + 1\n");
293 
294   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
295   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
296   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
297 
298   /* create the coloring */
299   if (ctype == IS_COLORING_GLOBAL) {
300     if (!dd->localcoloring) {
301       ierr = PetscMalloc1(nc*nx*ny*nz,&colors);CHKERRQ(ierr);
302       ii   = 0;
303       for (k=zs; k<zs+nz; k++) {
304         for (j=ys; j<ys+ny; j++) {
305           for (i=xs; i<xs+nx; i++) {
306             for (l=0; l<nc; l++) {
307               colors[ii++] = l + nc*((i % col) + col*(j % col) + col*col*(k % col));
308             }
309           }
310         }
311       }
312       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
313       ierr    = ISColoringCreate(comm,ncolors,nc*nx*ny*nz,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
314     }
315     *coloring = dd->localcoloring;
316   } else if (ctype == IS_COLORING_LOCAL) {
317     if (!dd->ghostedcoloring) {
318       ierr = PetscMalloc1(nc*gnx*gny*gnz,&colors);CHKERRQ(ierr);
319       ii   = 0;
320       for (k=gzs; k<gzs+gnz; k++) {
321         for (j=gys; j<gys+gny; j++) {
322           for (i=gxs; i<gxs+gnx; i++) {
323             for (l=0; l<nc; l++) {
324               /* the complicated stuff is to handle periodic boundaries */
325               colors[ii++] = l + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col) + col*col*(SetInRange(k,p) % col));
326             }
327           }
328         }
329       }
330       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
331       ierr    = ISColoringCreate(comm,ncolors,nc*gnx*gny*gnz,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
332       ierr    = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
333     }
334     *coloring = dd->ghostedcoloring;
335   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
336   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
337   PetscFunctionReturn(0);
338 }
339 
340 /* ---------------------------------------------------------------------------------*/
341 
342 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
343 {
344   PetscErrorCode   ierr;
345   PetscInt         xs,nx,i,i1,gxs,gnx,l,m,M,dim,s,nc,col;
346   PetscInt         ncolors;
347   MPI_Comm         comm;
348   DMBoundaryType bx;
349   ISColoringValue  *colors;
350   DM_DA            *dd = (DM_DA*)da->data;
351 
352   PetscFunctionBegin;
353   /*
354          nc - number of components per grid point
355          col - number of colors needed in one direction for single component problem
356 
357   */
358   ierr = DMDAGetInfo(da,&dim,&m,0,0,&M,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr);
359   col  = 2*s + 1;
360 
361   if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ2(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points %d is divisible\n\
362                                                           by 2*stencil_width + 1 %d\n",(int)m,(int)col);
363 
364   ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
365   ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
366   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
367 
368   /* create the coloring */
369   if (ctype == IS_COLORING_GLOBAL) {
370     if (!dd->localcoloring) {
371       ierr = PetscMalloc1(nc*nx,&colors);CHKERRQ(ierr);
372       if (dd->ofillcols) {
373         PetscInt tc = 0;
374         for (i=0; i<nc; i++) tc += (PetscInt) (dd->ofillcols[i] > 0);
375         i1 = 0;
376         for (i=xs; i<xs+nx; i++) {
377           for (l=0; l<nc; l++) {
378             if (dd->ofillcols[l] && (i % col)) {
379               colors[i1++] =  nc - 1 + tc*((i % col) - 1) + dd->ofillcols[l];
380             } else {
381               colors[i1++] = l;
382             }
383           }
384         }
385         ncolors = nc + 2*s*tc;
386       } else {
387         i1 = 0;
388         for (i=xs; i<xs+nx; i++) {
389           for (l=0; l<nc; l++) {
390             colors[i1++] = l + nc*(i % col);
391           }
392         }
393         ncolors = nc + nc*(col-1);
394       }
395       ierr = ISColoringCreate(comm,ncolors,nc*nx,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
396     }
397     *coloring = dd->localcoloring;
398   } else if (ctype == IS_COLORING_LOCAL) {
399     if (!dd->ghostedcoloring) {
400       ierr = PetscMalloc1(nc*gnx,&colors);CHKERRQ(ierr);
401       i1   = 0;
402       for (i=gxs; i<gxs+gnx; i++) {
403         for (l=0; l<nc; l++) {
404           /* the complicated stuff is to handle periodic boundaries */
405           colors[i1++] = l + nc*(SetInRange(i,m) % col);
406         }
407       }
408       ncolors = nc + nc*(col-1);
409       ierr    = ISColoringCreate(comm,ncolors,nc*gnx,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
410       ierr    = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
411     }
412     *coloring = dd->ghostedcoloring;
413   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
414   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
415   PetscFunctionReturn(0);
416 }
417 
418 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
419 {
420   PetscErrorCode   ierr;
421   PetscInt         xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,dim,s,k,nc;
422   PetscInt         ncolors;
423   MPI_Comm         comm;
424   DMBoundaryType bx,by;
425   ISColoringValue  *colors;
426   DM_DA            *dd = (DM_DA*)da->data;
427 
428   PetscFunctionBegin;
429   /*
430          nc - number of components per grid point
431          col - number of colors needed in one direction for single component problem
432 
433   */
434   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,0);CHKERRQ(ierr);
435   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
436   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
437   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
438 
439   if (bx == DM_BOUNDARY_PERIODIC && (m % 5)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible by 5\n");
440   if (by == DM_BOUNDARY_PERIODIC && (n % 5)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible by 5\n");
441 
442   /* create the coloring */
443   if (ctype == IS_COLORING_GLOBAL) {
444     if (!dd->localcoloring) {
445       ierr = PetscMalloc1(nc*nx*ny,&colors);CHKERRQ(ierr);
446       ii   = 0;
447       for (j=ys; j<ys+ny; j++) {
448         for (i=xs; i<xs+nx; i++) {
449           for (k=0; k<nc; k++) {
450             colors[ii++] = k + nc*((3*j+i) % 5);
451           }
452         }
453       }
454       ncolors = 5*nc;
455       ierr    = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
456     }
457     *coloring = dd->localcoloring;
458   } else if (ctype == IS_COLORING_LOCAL) {
459     if (!dd->ghostedcoloring) {
460       ierr = PetscMalloc1(nc*gnx*gny,&colors);CHKERRQ(ierr);
461       ii = 0;
462       for (j=gys; j<gys+gny; j++) {
463         for (i=gxs; i<gxs+gnx; i++) {
464           for (k=0; k<nc; k++) {
465             colors[ii++] = k + nc*((3*SetInRange(j,n) + SetInRange(i,m)) % 5);
466           }
467         }
468       }
469       ncolors = 5*nc;
470       ierr    = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
471       ierr    = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
472     }
473     *coloring = dd->ghostedcoloring;
474   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
475   PetscFunctionReturn(0);
476 }
477 
478 /* =========================================================================== */
479 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM,Mat);
480 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM,Mat);
481 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM,Mat);
482 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM,Mat);
483 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM,Mat);
484 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM,Mat);
485 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM,Mat);
486 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM,Mat);
487 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM,Mat);
488 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM,Mat);
489 
490 /*@C
491    MatSetupDM - Sets the DMDA that is to be used by the HYPRE_StructMatrix PETSc matrix
492 
493    Logically Collective on Mat
494 
495    Input Parameters:
496 +  mat - the matrix
497 -  da - the da
498 
499    Level: intermediate
500 
501 @*/
502 PetscErrorCode MatSetupDM(Mat mat,DM da)
503 {
504   PetscErrorCode ierr;
505 
506   PetscFunctionBegin;
507   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
508   PetscValidHeaderSpecific(da,DM_CLASSID,1);
509   ierr = PetscTryMethod(mat,"MatSetupDM_C",(Mat,DM),(mat,da));CHKERRQ(ierr);
510   PetscFunctionReturn(0);
511 }
512 
513 PetscErrorCode  MatView_MPI_DA(Mat A,PetscViewer viewer)
514 {
515   DM                da;
516   PetscErrorCode    ierr;
517   const char        *prefix;
518   Mat               Anatural;
519   AO                ao;
520   PetscInt          rstart,rend,*petsc,i;
521   IS                is;
522   MPI_Comm          comm;
523   PetscViewerFormat format;
524 
525   PetscFunctionBegin;
526   /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */
527   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
528   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(0);
529 
530   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
531   ierr = MatGetDM(A, &da);CHKERRQ(ierr);
532   if (!da) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
533 
534   ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);
535   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
536   ierr = PetscMalloc1(rend-rstart,&petsc);CHKERRQ(ierr);
537   for (i=rstart; i<rend; i++) petsc[i-rstart] = i;
538   ierr = AOApplicationToPetsc(ao,rend-rstart,petsc);CHKERRQ(ierr);
539   ierr = ISCreateGeneral(comm,rend-rstart,petsc,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
540 
541   /* call viewer on natural ordering */
542   ierr = MatGetSubMatrix(A,is,is,MAT_INITIAL_MATRIX,&Anatural);CHKERRQ(ierr);
543   ierr = ISDestroy(&is);CHKERRQ(ierr);
544   ierr = PetscObjectGetOptionsPrefix((PetscObject)A,&prefix);CHKERRQ(ierr);
545   ierr = PetscObjectSetOptionsPrefix((PetscObject)Anatural,prefix);CHKERRQ(ierr);
546   ierr = PetscObjectSetName((PetscObject)Anatural,((PetscObject)A)->name);CHKERRQ(ierr);
547   ierr = MatView(Anatural,viewer);CHKERRQ(ierr);
548   ierr = MatDestroy(&Anatural);CHKERRQ(ierr);
549   PetscFunctionReturn(0);
550 }
551 
552 PetscErrorCode  MatLoad_MPI_DA(Mat A,PetscViewer viewer)
553 {
554   DM             da;
555   PetscErrorCode ierr;
556   Mat            Anatural,Aapp;
557   AO             ao;
558   PetscInt       rstart,rend,*app,i,m,n,M,N;
559   IS             is;
560   MPI_Comm       comm;
561 
562   PetscFunctionBegin;
563   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
564   ierr = MatGetDM(A, &da);CHKERRQ(ierr);
565   if (!da) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
566 
567   /* Load the matrix in natural ordering */
568   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Anatural);CHKERRQ(ierr);
569   ierr = MatSetType(Anatural,((PetscObject)A)->type_name);CHKERRQ(ierr);
570   ierr = MatGetSize(A,&M,&N);CHKERRQ(ierr);
571   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
572   ierr = MatSetSizes(Anatural,m,n,M,N);CHKERRQ(ierr);
573   ierr = MatLoad(Anatural,viewer);CHKERRQ(ierr);
574 
575   /* Map natural ordering to application ordering and create IS */
576   ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);
577   ierr = MatGetOwnershipRange(Anatural,&rstart,&rend);CHKERRQ(ierr);
578   ierr = PetscMalloc1(rend-rstart,&app);CHKERRQ(ierr);
579   for (i=rstart; i<rend; i++) app[i-rstart] = i;
580   ierr = AOPetscToApplication(ao,rend-rstart,app);CHKERRQ(ierr);
581   ierr = ISCreateGeneral(comm,rend-rstart,app,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
582 
583   /* Do permutation and replace header */
584   ierr = MatGetSubMatrix(Anatural,is,is,MAT_INITIAL_MATRIX,&Aapp);CHKERRQ(ierr);
585   ierr = MatHeaderReplace(A,&Aapp);CHKERRQ(ierr);
586   ierr = ISDestroy(&is);CHKERRQ(ierr);
587   ierr = MatDestroy(&Anatural);CHKERRQ(ierr);
588   PetscFunctionReturn(0);
589 }
590 
591 PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J)
592 {
593   PetscErrorCode ierr;
594   PetscInt       dim,dof,nx,ny,nz,dims[3],starts[3],M,N,P;
595   Mat            A;
596   MPI_Comm       comm;
597   MatType        Atype;
598   PetscSection   section, sectionGlobal;
599   void           (*aij)(void)=NULL,(*baij)(void)=NULL,(*sbaij)(void)=NULL;
600   MatType        mtype;
601   PetscMPIInt    size;
602   DM_DA          *dd = (DM_DA*)da->data;
603 
604   PetscFunctionBegin;
605   ierr = MatInitializePackage();CHKERRQ(ierr);
606   mtype = da->mattype;
607 
608   ierr = DMGetDefaultSection(da, &section);CHKERRQ(ierr);
609   if (section) {
610     PetscInt  bs = -1;
611     PetscInt  localSize;
612     PetscBool isShell, isBlock, isSeqBlock, isMPIBlock, isSymBlock, isSymSeqBlock, isSymMPIBlock, isSymmetric;
613 
614     ierr = DMGetDefaultGlobalSection(da, &sectionGlobal);CHKERRQ(ierr);
615     ierr = PetscSectionGetConstrainedStorageSize(sectionGlobal, &localSize);CHKERRQ(ierr);
616     ierr = MatCreate(PetscObjectComm((PetscObject)da), J);CHKERRQ(ierr);
617     ierr = MatSetSizes(*J, localSize, localSize, PETSC_DETERMINE, PETSC_DETERMINE);CHKERRQ(ierr);
618     ierr = MatSetType(*J, mtype);CHKERRQ(ierr);
619     ierr = MatSetFromOptions(*J);CHKERRQ(ierr);
620     ierr = PetscStrcmp(mtype, MATSHELL, &isShell);CHKERRQ(ierr);
621     ierr = PetscStrcmp(mtype, MATBAIJ, &isBlock);CHKERRQ(ierr);
622     ierr = PetscStrcmp(mtype, MATSEQBAIJ, &isSeqBlock);CHKERRQ(ierr);
623     ierr = PetscStrcmp(mtype, MATMPIBAIJ, &isMPIBlock);CHKERRQ(ierr);
624     ierr = PetscStrcmp(mtype, MATSBAIJ, &isSymBlock);CHKERRQ(ierr);
625     ierr = PetscStrcmp(mtype, MATSEQSBAIJ, &isSymSeqBlock);CHKERRQ(ierr);
626     ierr = PetscStrcmp(mtype, MATMPISBAIJ, &isSymMPIBlock);CHKERRQ(ierr);
627     /* Check for symmetric storage */
628     isSymmetric = (PetscBool) (isSymBlock || isSymSeqBlock || isSymMPIBlock);
629     if (isSymmetric) {
630       ierr = MatSetOption(*J, MAT_IGNORE_LOWER_TRIANGULAR, PETSC_TRUE);CHKERRQ(ierr);
631     }
632     if (!isShell) {
633       PetscInt *dnz, *onz, *dnzu, *onzu, bsLocal;
634 
635       if (bs < 0) {
636         if (isBlock || isSeqBlock || isMPIBlock || isSymBlock || isSymSeqBlock || isSymMPIBlock) {
637           PetscInt pStart, pEnd, p, dof;
638 
639           ierr = PetscSectionGetChart(sectionGlobal, &pStart, &pEnd);CHKERRQ(ierr);
640           for (p = pStart; p < pEnd; ++p) {
641             ierr = PetscSectionGetDof(sectionGlobal, p, &dof);CHKERRQ(ierr);
642             if (dof) {
643               bs = dof;
644               break;
645             }
646           }
647         } else {
648           bs = 1;
649         }
650         /* Must have same blocksize on all procs (some might have no points) */
651         bsLocal = bs;
652         ierr    = MPIU_Allreduce(&bsLocal, &bs, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)da));CHKERRQ(ierr);
653       }
654       ierr = PetscCalloc4(localSize/bs, &dnz, localSize/bs, &onz, localSize/bs, &dnzu, localSize/bs, &onzu);CHKERRQ(ierr);
655       /* ierr = DMPlexPreallocateOperator(dm, bs, section, sectionGlobal, dnz, onz, dnzu, onzu, *J, fillMatrix);CHKERRQ(ierr); */
656       ierr = PetscFree4(dnz, onz, dnzu, onzu);CHKERRQ(ierr);
657     }
658   }
659   /*
660                                   m
661           ------------------------------------------------------
662          |                                                     |
663          |                                                     |
664          |               ----------------------                |
665          |               |                    |                |
666       n  |           ny  |                    |                |
667          |               |                    |                |
668          |               .---------------------                |
669          |             (xs,ys)     nx                          |
670          |            .                                        |
671          |         (gxs,gys)                                   |
672          |                                                     |
673           -----------------------------------------------------
674   */
675 
676   /*
677          nc - number of components per grid point
678          col - number of colors needed in one direction for single component problem
679 
680   */
681   M   = dd->M;
682   N   = dd->N;
683   P   = dd->P;
684   dim = da->dim;
685   dof = dd->w;
686   /* ierr = DMDAGetInfo(da,&dim,&M,&N,&P,0,0,0,&dof,0,0,0,0,0);CHKERRQ(ierr); */
687   ierr = DMDAGetCorners(da,0,0,0,&nx,&ny,&nz);CHKERRQ(ierr);
688   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
689   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
690   ierr = MatSetSizes(A,dof*nx*ny*nz,dof*nx*ny*nz,dof*M*N*P,dof*M*N*P);CHKERRQ(ierr);
691   ierr = MatSetType(A,mtype);CHKERRQ(ierr);
692   ierr = MatSetDM(A,da);CHKERRQ(ierr);
693   ierr = MatSetFromOptions(A);CHKERRQ(ierr);
694   ierr = MatGetType(A,&Atype);CHKERRQ(ierr);
695   /*
696      We do not provide a getmatrix function in the DMDA operations because
697    the basic DMDA does not know about matrices. We think of DMDA as being more
698    more low-level than matrices. This is kind of cheating but, cause sometimes
699    we think of DMDA has higher level than matrices.
700 
701      We could switch based on Atype (or mtype), but we do not since the
702    specialized setting routines depend only the particular preallocation
703    details of the matrix, not the type itself.
704   */
705   ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJSetPreallocation_C",&aij);CHKERRQ(ierr);
706   if (!aij) {
707     ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJSetPreallocation_C",&aij);CHKERRQ(ierr);
708   }
709   if (!aij) {
710     ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIBAIJSetPreallocation_C",&baij);CHKERRQ(ierr);
711     if (!baij) {
712       ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",&baij);CHKERRQ(ierr);
713     }
714     if (!baij) {
715       ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPISBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr);
716       if (!sbaij) {
717         ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr);
718       }
719     }
720   }
721   if (aij) {
722     if (dim == 1) {
723       if (dd->ofill) {
724         ierr = DMCreateMatrix_DA_1d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
725       } else {
726         ierr = DMCreateMatrix_DA_1d_MPIAIJ(da,A);CHKERRQ(ierr);
727       }
728     } else if (dim == 2) {
729       if (dd->ofill) {
730         ierr = DMCreateMatrix_DA_2d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
731       } else {
732         ierr = DMCreateMatrix_DA_2d_MPIAIJ(da,A);CHKERRQ(ierr);
733       }
734     } else if (dim == 3) {
735       if (dd->ofill) {
736         ierr = DMCreateMatrix_DA_3d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
737       } else {
738         ierr = DMCreateMatrix_DA_3d_MPIAIJ(da,A);CHKERRQ(ierr);
739       }
740     }
741   } else if (baij) {
742     if (dim == 2) {
743       ierr = DMCreateMatrix_DA_2d_MPIBAIJ(da,A);CHKERRQ(ierr);
744     } else if (dim == 3) {
745       ierr = DMCreateMatrix_DA_3d_MPIBAIJ(da,A);CHKERRQ(ierr);
746     } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
747   } else if (sbaij) {
748     if (dim == 2) {
749       ierr = DMCreateMatrix_DA_2d_MPISBAIJ(da,A);CHKERRQ(ierr);
750     } else if (dim == 3) {
751       ierr = DMCreateMatrix_DA_3d_MPISBAIJ(da,A);CHKERRQ(ierr);
752     } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
753   } else {
754     ISLocalToGlobalMapping ltog;
755     ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
756     ierr = MatSetUp(A);CHKERRQ(ierr);
757     ierr = MatSetLocalToGlobalMapping(A,ltog,ltog);CHKERRQ(ierr);
758   }
759   ierr = DMDAGetGhostCorners(da,&starts[0],&starts[1],&starts[2],&dims[0],&dims[1],&dims[2]);CHKERRQ(ierr);
760   ierr = MatSetStencil(A,dim,dims,starts,dof);CHKERRQ(ierr);
761   ierr = MatSetDM(A,da);CHKERRQ(ierr);
762   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
763   if (size > 1) {
764     /* change viewer to display matrix in natural ordering */
765     ierr = MatShellSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA);CHKERRQ(ierr);
766     ierr = MatShellSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA);CHKERRQ(ierr);
767   }
768   *J = A;
769   PetscFunctionReturn(0);
770 }
771 
772 /* ---------------------------------------------------------------------------------*/
773 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da,Mat J)
774 {
775   PetscErrorCode         ierr;
776   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,M,N;
777   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
778   MPI_Comm               comm;
779   PetscScalar            *values;
780   DMBoundaryType         bx,by;
781   ISLocalToGlobalMapping ltog;
782   DMDAStencilType        st;
783   PetscBool              removedups = PETSC_FALSE;
784 
785   PetscFunctionBegin;
786   /*
787          nc - number of components per grid point
788          col - number of colors needed in one direction for single component problem
789 
790   */
791   ierr = DMDAGetInfo(da,&dim,&m,&n,&M,&N,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
792   col  = 2*s + 1;
793   /*
794        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
795        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
796   */
797   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
798   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
799   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
800   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
801   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
802 
803   ierr = PetscMalloc2(nc,&rows,col*col*nc*nc,&cols);CHKERRQ(ierr);
804   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
805 
806   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
807   /* determine the matrix preallocation information */
808   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
809   for (i=xs; i<xs+nx; i++) {
810 
811     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
812     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
813 
814     for (j=ys; j<ys+ny; j++) {
815       slot = i - gxs + gnx*(j - gys);
816 
817       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
818       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
819 
820       cnt = 0;
821       for (k=0; k<nc; k++) {
822         for (l=lstart; l<lend+1; l++) {
823           for (p=pstart; p<pend+1; p++) {
824             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
825               cols[cnt++] = k + nc*(slot + gnx*l + p);
826             }
827           }
828         }
829         rows[k] = k + nc*(slot);
830       }
831       if (removedups) {
832         ierr = MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
833       } else {
834         ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
835       }
836     }
837   }
838   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
839   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
840   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
841   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
842 
843   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
844 
845   /*
846     For each node in the grid: we get the neighbors in the local (on processor ordering
847     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
848     PETSc ordering.
849   */
850   if (!da->prealloc_only) {
851     ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr);
852     for (i=xs; i<xs+nx; i++) {
853 
854       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
855       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
856 
857       for (j=ys; j<ys+ny; j++) {
858         slot = i - gxs + gnx*(j - gys);
859 
860         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
861         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
862 
863         cnt = 0;
864         for (k=0; k<nc; k++) {
865           for (l=lstart; l<lend+1; l++) {
866             for (p=pstart; p<pend+1; p++) {
867               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
868                 cols[cnt++] = k + nc*(slot + gnx*l + p);
869               }
870             }
871           }
872           rows[k] = k + nc*(slot);
873         }
874         ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
875       }
876     }
877     ierr = PetscFree(values);CHKERRQ(ierr);
878     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
879     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
880     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
881   }
882   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
883   PetscFunctionReturn(0);
884 }
885 
886 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da,Mat J)
887 {
888   PetscErrorCode         ierr;
889   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
890   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt,maxcnt = 0,l,p,M,N;
891   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
892   DM_DA                  *dd = (DM_DA*)da->data;
893   PetscInt               ifill_col,*ofill = dd->ofill, *dfill = dd->dfill;
894   MPI_Comm               comm;
895   PetscScalar            *values;
896   DMBoundaryType         bx,by;
897   ISLocalToGlobalMapping ltog;
898   DMDAStencilType        st;
899   PetscBool              removedups = PETSC_FALSE;
900 
901   PetscFunctionBegin;
902   /*
903          nc - number of components per grid point
904          col - number of colors needed in one direction for single component problem
905 
906   */
907   ierr = DMDAGetInfo(da,&dim,&m,&n,&M,&N,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
908   col  = 2*s + 1;
909   /*
910        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
911        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
912   */
913   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
914   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
915   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
916   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
917   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
918 
919   ierr = PetscMalloc1(col*col*nc,&cols);CHKERRQ(ierr);
920   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
921 
922   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
923   /* determine the matrix preallocation information */
924   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
925   for (i=xs; i<xs+nx; i++) {
926 
927     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
928     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
929 
930     for (j=ys; j<ys+ny; j++) {
931       slot = i - gxs + gnx*(j - gys);
932 
933       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
934       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
935 
936       for (k=0; k<nc; k++) {
937         cnt = 0;
938         for (l=lstart; l<lend+1; l++) {
939           for (p=pstart; p<pend+1; p++) {
940             if (l || p) {
941               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
942                 for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p);
943               }
944             } else {
945               if (dfill) {
946                 for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p);
947               } else {
948                 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p);
949               }
950             }
951           }
952         }
953         row    = k + nc*(slot);
954         maxcnt = PetscMax(maxcnt,cnt);
955         if (removedups) {
956           ierr   = MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
957         } else {
958           ierr   = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
959         }
960       }
961     }
962   }
963   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
964   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
965   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
966   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
967 
968   /*
969     For each node in the grid: we get the neighbors in the local (on processor ordering
970     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
971     PETSc ordering.
972   */
973   if (!da->prealloc_only) {
974     ierr = PetscCalloc1(maxcnt,&values);CHKERRQ(ierr);
975     for (i=xs; i<xs+nx; i++) {
976 
977       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
978       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
979 
980       for (j=ys; j<ys+ny; j++) {
981         slot = i - gxs + gnx*(j - gys);
982 
983         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
984         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
985 
986         for (k=0; k<nc; k++) {
987           cnt = 0;
988           for (l=lstart; l<lend+1; l++) {
989             for (p=pstart; p<pend+1; p++) {
990               if (l || p) {
991                 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
992                   for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p);
993                 }
994               } else {
995                 if (dfill) {
996                   for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p);
997                 } else {
998                   for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p);
999                 }
1000               }
1001             }
1002           }
1003           row  = k + nc*(slot);
1004           ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1005         }
1006       }
1007     }
1008     ierr = PetscFree(values);CHKERRQ(ierr);
1009     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1010     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1011     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1012   }
1013   ierr = PetscFree(cols);CHKERRQ(ierr);
1014   PetscFunctionReturn(0);
1015 }
1016 
1017 /* ---------------------------------------------------------------------------------*/
1018 
1019 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da,Mat J)
1020 {
1021   PetscErrorCode         ierr;
1022   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1023   PetscInt               m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL;
1024   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
1025   MPI_Comm               comm;
1026   PetscScalar            *values;
1027   DMBoundaryType         bx,by,bz;
1028   ISLocalToGlobalMapping ltog;
1029   DMDAStencilType        st;
1030   PetscBool              removedups = PETSC_FALSE;
1031 
1032   PetscFunctionBegin;
1033   /*
1034          nc - number of components per grid point
1035          col - number of colors needed in one direction for single component problem
1036 
1037   */
1038   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1039   col  = 2*s + 1;
1040 
1041   /*
1042        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1043        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1044   */
1045   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1046   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1047   if (P == 1 && 2*s >= p) removedups = PETSC_TRUE;
1048 
1049   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1050   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1051   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1052 
1053   ierr = PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols);CHKERRQ(ierr);
1054   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1055 
1056   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1057   /* determine the matrix preallocation information */
1058   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1059   for (i=xs; i<xs+nx; i++) {
1060     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1061     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1062     for (j=ys; j<ys+ny; j++) {
1063       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1064       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1065       for (k=zs; k<zs+nz; k++) {
1066         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1067         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1068 
1069         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1070 
1071         cnt = 0;
1072         for (l=0; l<nc; l++) {
1073           for (ii=istart; ii<iend+1; ii++) {
1074             for (jj=jstart; jj<jend+1; jj++) {
1075               for (kk=kstart; kk<kend+1; kk++) {
1076                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1077                   cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1078                 }
1079               }
1080             }
1081           }
1082           rows[l] = l + nc*(slot);
1083         }
1084         if (removedups) {
1085           ierr = MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1086         } else {
1087           ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1088         }
1089       }
1090     }
1091   }
1092   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1093   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1094   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1095   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1096   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1097 
1098   /*
1099     For each node in the grid: we get the neighbors in the local (on processor ordering
1100     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1101     PETSc ordering.
1102   */
1103   if (!da->prealloc_only) {
1104     ierr = PetscCalloc1(col*col*col*nc*nc*nc,&values);CHKERRQ(ierr);
1105     for (i=xs; i<xs+nx; i++) {
1106       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1107       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1108       for (j=ys; j<ys+ny; j++) {
1109         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1110         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1111         for (k=zs; k<zs+nz; k++) {
1112           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1113           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1114 
1115           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1116 
1117           cnt = 0;
1118           for (l=0; l<nc; l++) {
1119             for (ii=istart; ii<iend+1; ii++) {
1120               for (jj=jstart; jj<jend+1; jj++) {
1121                 for (kk=kstart; kk<kend+1; kk++) {
1122                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1123                     cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1124                   }
1125                 }
1126               }
1127             }
1128             rows[l] = l + nc*(slot);
1129           }
1130           ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1131         }
1132       }
1133     }
1134     ierr = PetscFree(values);CHKERRQ(ierr);
1135     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1136     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1137     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1138   }
1139   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1140   PetscFunctionReturn(0);
1141 }
1142 
1143 /* ---------------------------------------------------------------------------------*/
1144 
1145 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da,Mat J)
1146 {
1147   PetscErrorCode         ierr;
1148   DM_DA                  *dd = (DM_DA*)da->data;
1149   PetscInt               xs,nx,i,j,gxs,gnx,row,k,l;
1150   PetscInt               m,dim,s,*cols = NULL,nc,cnt,maxcnt = 0,*ocols;
1151   PetscInt               *ofill = dd->ofill,*dfill = dd->dfill;
1152   PetscScalar            *values;
1153   DMBoundaryType         bx;
1154   ISLocalToGlobalMapping ltog;
1155   PetscMPIInt            rank,size;
1156 
1157   PetscFunctionBegin;
1158   if (dd->bx == DM_BOUNDARY_PERIODIC) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"With fill provided not implemented with periodic boundary conditions");
1159   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)da),&rank);CHKERRQ(ierr);
1160   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)da),&size);CHKERRQ(ierr);
1161 
1162   /*
1163          nc - number of components per grid point
1164 
1165   */
1166   ierr = DMDAGetInfo(da,&dim,&m,0,0,0,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr);
1167   ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
1168   ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
1169 
1170   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1171   ierr = PetscCalloc2(nx*nc,&cols,nx*nc,&ocols);CHKERRQ(ierr);
1172 
1173   /*
1174         note should be smaller for first and last process with no periodic
1175         does not handle dfill
1176   */
1177   cnt = 0;
1178   /* coupling with process to the left */
1179   for (i=0; i<s; i++) {
1180     for (j=0; j<nc; j++) {
1181       ocols[cnt] = ((!rank) ? 0 : (s - i)*(ofill[j+1] - ofill[j]));
1182       cols[cnt]  = dfill[j+1] - dfill[j] + (s + i)*(ofill[j+1] - ofill[j]);
1183       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1184       cnt++;
1185     }
1186   }
1187   for (i=s; i<nx-s; i++) {
1188     for (j=0; j<nc; j++) {
1189       cols[cnt] = dfill[j+1] - dfill[j] + 2*s*(ofill[j+1] - ofill[j]);
1190       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1191       cnt++;
1192     }
1193   }
1194   /* coupling with process to the right */
1195   for (i=nx-s; i<nx; i++) {
1196     for (j=0; j<nc; j++) {
1197       ocols[cnt] = ((rank == (size-1)) ? 0 : (i - nx + s + 1)*(ofill[j+1] - ofill[j]));
1198       cols[cnt]  = dfill[j+1] - dfill[j] + (s + nx - i - 1)*(ofill[j+1] - ofill[j]);
1199       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1200       cnt++;
1201     }
1202   }
1203 
1204   ierr = MatSeqAIJSetPreallocation(J,0,cols);CHKERRQ(ierr);
1205   ierr = MatMPIAIJSetPreallocation(J,0,cols,0,ocols);CHKERRQ(ierr);
1206   ierr = PetscFree2(cols,ocols);CHKERRQ(ierr);
1207 
1208   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1209   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1210 
1211   /*
1212     For each node in the grid: we get the neighbors in the local (on processor ordering
1213     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1214     PETSc ordering.
1215   */
1216   if (!da->prealloc_only) {
1217     ierr = PetscCalloc2(maxcnt,&values,maxcnt,&cols);CHKERRQ(ierr);
1218 
1219     row = xs*nc;
1220     /* coupling with process to the left */
1221     for (i=xs; i<xs+s; i++) {
1222       for (j=0; j<nc; j++) {
1223         cnt = 0;
1224         if (rank) {
1225           for (l=0; l<s; l++) {
1226             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1227           }
1228         }
1229         if (dfill) {
1230           for (k=dfill[j]; k<dfill[j+1]; k++) {
1231             cols[cnt++] = i*nc + dfill[k];
1232           }
1233         } else {
1234           for (k=0; k<nc; k++) {
1235             cols[cnt++] = i*nc + k;
1236           }
1237         }
1238         for (l=0; l<s; l++) {
1239           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1240         }
1241         ierr = MatSetValues(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1242         row++;
1243       }
1244     }
1245     for (i=xs+s; i<xs+nx-s; i++) {
1246       for (j=0; j<nc; j++) {
1247         cnt = 0;
1248         for (l=0; l<s; l++) {
1249           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1250         }
1251         if (dfill) {
1252           for (k=dfill[j]; k<dfill[j+1]; k++) {
1253             cols[cnt++] = i*nc + dfill[k];
1254           }
1255         } else {
1256           for (k=0; k<nc; k++) {
1257             cols[cnt++] = i*nc + k;
1258           }
1259         }
1260         for (l=0; l<s; l++) {
1261           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1262         }
1263         ierr = MatSetValues(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1264         row++;
1265       }
1266     }
1267     /* coupling with process to the right */
1268     for (i=xs+nx-s; i<xs+nx; i++) {
1269       for (j=0; j<nc; j++) {
1270         cnt = 0;
1271         for (l=0; l<s; l++) {
1272           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1273         }
1274         if (dfill) {
1275           for (k=dfill[j]; k<dfill[j+1]; k++) {
1276             cols[cnt++] = i*nc + dfill[k];
1277           }
1278         } else {
1279           for (k=0; k<nc; k++) {
1280             cols[cnt++] = i*nc + k;
1281           }
1282         }
1283         if (rank < size-1) {
1284           for (l=0; l<s; l++) {
1285             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1286           }
1287         }
1288         ierr = MatSetValues(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1289         row++;
1290       }
1291     }
1292     ierr = PetscFree2(values,cols);CHKERRQ(ierr);
1293     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1294     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1295     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1296   }
1297   PetscFunctionReturn(0);
1298 }
1299 
1300 /* ---------------------------------------------------------------------------------*/
1301 
1302 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da,Mat J)
1303 {
1304   PetscErrorCode         ierr;
1305   PetscInt               xs,nx,i,i1,slot,gxs,gnx;
1306   PetscInt               m,dim,s,*cols = NULL,nc,*rows = NULL,col,cnt,l;
1307   PetscInt               istart,iend;
1308   PetscScalar            *values;
1309   DMBoundaryType         bx;
1310   ISLocalToGlobalMapping ltog;
1311 
1312   PetscFunctionBegin;
1313   /*
1314          nc - number of components per grid point
1315          col - number of colors needed in one direction for single component problem
1316 
1317   */
1318   ierr = DMDAGetInfo(da,&dim,&m,0,0,0,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr);
1319   col  = 2*s + 1;
1320 
1321   ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
1322   ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
1323 
1324   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1325   ierr = MatSeqAIJSetPreallocation(J,col*nc,0);CHKERRQ(ierr);
1326   ierr = MatMPIAIJSetPreallocation(J,col*nc,0,col*nc,0);CHKERRQ(ierr);
1327 
1328   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1329   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1330 
1331   /*
1332     For each node in the grid: we get the neighbors in the local (on processor ordering
1333     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1334     PETSc ordering.
1335   */
1336   if (!da->prealloc_only) {
1337     ierr = PetscMalloc2(nc,&rows,col*nc*nc,&cols);CHKERRQ(ierr);
1338     ierr = PetscCalloc1(col*nc*nc,&values);CHKERRQ(ierr);
1339     for (i=xs; i<xs+nx; i++) {
1340       istart = PetscMax(-s,gxs - i);
1341       iend   = PetscMin(s,gxs + gnx - i - 1);
1342       slot   = i - gxs;
1343 
1344       cnt = 0;
1345       for (l=0; l<nc; l++) {
1346         for (i1=istart; i1<iend+1; i1++) {
1347           cols[cnt++] = l + nc*(slot + i1);
1348         }
1349         rows[l] = l + nc*(slot);
1350       }
1351       ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1352     }
1353     ierr = PetscFree(values);CHKERRQ(ierr);
1354     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1355     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1356     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1357     ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1358   }
1359   PetscFunctionReturn(0);
1360 }
1361 
1362 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da,Mat J)
1363 {
1364   PetscErrorCode         ierr;
1365   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1366   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
1367   PetscInt               istart,iend,jstart,jend,ii,jj;
1368   MPI_Comm               comm;
1369   PetscScalar            *values;
1370   DMBoundaryType         bx,by;
1371   DMDAStencilType        st;
1372   ISLocalToGlobalMapping ltog;
1373 
1374   PetscFunctionBegin;
1375   /*
1376      nc - number of components per grid point
1377      col - number of colors needed in one direction for single component problem
1378   */
1379   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
1380   col  = 2*s + 1;
1381 
1382   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
1383   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
1384   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1385 
1386   ierr = PetscMalloc1(col*col*nc*nc,&cols);CHKERRQ(ierr);
1387 
1388   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1389 
1390   /* determine the matrix preallocation information */
1391   ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
1392   for (i=xs; i<xs+nx; i++) {
1393     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1394     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1395     for (j=ys; j<ys+ny; j++) {
1396       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1397       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1398       slot   = i - gxs + gnx*(j - gys);
1399 
1400       /* Find block columns in block row */
1401       cnt = 0;
1402       for (ii=istart; ii<iend+1; ii++) {
1403         for (jj=jstart; jj<jend+1; jj++) {
1404           if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1405             cols[cnt++] = slot + ii + gnx*jj;
1406           }
1407         }
1408       }
1409       ierr = MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1410     }
1411   }
1412   ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1413   ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1414   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1415 
1416   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1417 
1418   /*
1419     For each node in the grid: we get the neighbors in the local (on processor ordering
1420     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1421     PETSc ordering.
1422   */
1423   if (!da->prealloc_only) {
1424     ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr);
1425     for (i=xs; i<xs+nx; i++) {
1426       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1427       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1428       for (j=ys; j<ys+ny; j++) {
1429         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1430         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1431         slot = i - gxs + gnx*(j - gys);
1432         cnt  = 0;
1433         for (ii=istart; ii<iend+1; ii++) {
1434           for (jj=jstart; jj<jend+1; jj++) {
1435             if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1436               cols[cnt++] = slot + ii + gnx*jj;
1437             }
1438           }
1439         }
1440         ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1441       }
1442     }
1443     ierr = PetscFree(values);CHKERRQ(ierr);
1444     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1445     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1446     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1447   }
1448   ierr = PetscFree(cols);CHKERRQ(ierr);
1449   PetscFunctionReturn(0);
1450 }
1451 
1452 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da,Mat J)
1453 {
1454   PetscErrorCode         ierr;
1455   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1456   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
1457   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1458   MPI_Comm               comm;
1459   PetscScalar            *values;
1460   DMBoundaryType         bx,by,bz;
1461   DMDAStencilType        st;
1462   ISLocalToGlobalMapping ltog;
1463 
1464   PetscFunctionBegin;
1465   /*
1466          nc - number of components per grid point
1467          col - number of colors needed in one direction for single component problem
1468 
1469   */
1470   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1471   col  = 2*s + 1;
1472 
1473   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1474   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1475   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1476 
1477   ierr = PetscMalloc1(col*col*col,&cols);CHKERRQ(ierr);
1478 
1479   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1480 
1481   /* determine the matrix preallocation information */
1482   ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1483   for (i=xs; i<xs+nx; i++) {
1484     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1485     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1486     for (j=ys; j<ys+ny; j++) {
1487       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1488       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1489       for (k=zs; k<zs+nz; k++) {
1490         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1491         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1492 
1493         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1494 
1495         /* Find block columns in block row */
1496         cnt = 0;
1497         for (ii=istart; ii<iend+1; ii++) {
1498           for (jj=jstart; jj<jend+1; jj++) {
1499             for (kk=kstart; kk<kend+1; kk++) {
1500               if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1501                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1502               }
1503             }
1504           }
1505         }
1506         ierr = MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1507       }
1508     }
1509   }
1510   ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1511   ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1512   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1513 
1514   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1515 
1516   /*
1517     For each node in the grid: we get the neighbors in the local (on processor ordering
1518     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1519     PETSc ordering.
1520   */
1521   if (!da->prealloc_only) {
1522     ierr = PetscCalloc1(col*col*col*nc*nc,&values);CHKERRQ(ierr);
1523     for (i=xs; i<xs+nx; i++) {
1524       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1525       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1526       for (j=ys; j<ys+ny; j++) {
1527         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1528         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1529         for (k=zs; k<zs+nz; k++) {
1530           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1531           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1532 
1533           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1534 
1535           cnt = 0;
1536           for (ii=istart; ii<iend+1; ii++) {
1537             for (jj=jstart; jj<jend+1; jj++) {
1538               for (kk=kstart; kk<kend+1; kk++) {
1539                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1540                   cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1541                 }
1542               }
1543             }
1544           }
1545           ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1546         }
1547       }
1548     }
1549     ierr = PetscFree(values);CHKERRQ(ierr);
1550     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1551     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1552     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1553   }
1554   ierr = PetscFree(cols);CHKERRQ(ierr);
1555   PetscFunctionReturn(0);
1556 }
1557 
1558 /*
1559   This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to
1560   identify in the local ordering with periodic domain.
1561 */
1562 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog,PetscInt *row,PetscInt *cnt,PetscInt col[])
1563 {
1564   PetscErrorCode ierr;
1565   PetscInt       i,n;
1566 
1567   PetscFunctionBegin;
1568   ierr = ISLocalToGlobalMappingApplyBlock(ltog,1,row,row);CHKERRQ(ierr);
1569   ierr = ISLocalToGlobalMappingApplyBlock(ltog,*cnt,col,col);CHKERRQ(ierr);
1570   for (i=0,n=0; i<*cnt; i++) {
1571     if (col[i] >= *row) col[n++] = col[i];
1572   }
1573   *cnt = n;
1574   PetscFunctionReturn(0);
1575 }
1576 
1577 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da,Mat J)
1578 {
1579   PetscErrorCode         ierr;
1580   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1581   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
1582   PetscInt               istart,iend,jstart,jend,ii,jj;
1583   MPI_Comm               comm;
1584   PetscScalar            *values;
1585   DMBoundaryType         bx,by;
1586   DMDAStencilType        st;
1587   ISLocalToGlobalMapping ltog;
1588 
1589   PetscFunctionBegin;
1590   /*
1591      nc - number of components per grid point
1592      col - number of colors needed in one direction for single component problem
1593   */
1594   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
1595   col  = 2*s + 1;
1596 
1597   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
1598   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
1599   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1600 
1601   ierr = PetscMalloc1(col*col*nc*nc,&cols);CHKERRQ(ierr);
1602 
1603   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1604 
1605   /* determine the matrix preallocation information */
1606   ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
1607   for (i=xs; i<xs+nx; i++) {
1608     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1609     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1610     for (j=ys; j<ys+ny; j++) {
1611       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1612       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1613       slot   = i - gxs + gnx*(j - gys);
1614 
1615       /* Find block columns in block row */
1616       cnt = 0;
1617       for (ii=istart; ii<iend+1; ii++) {
1618         for (jj=jstart; jj<jend+1; jj++) {
1619           if (st == DMDA_STENCIL_BOX || !ii || !jj) {
1620             cols[cnt++] = slot + ii + gnx*jj;
1621           }
1622         }
1623       }
1624       ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
1625       ierr = MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
1626     }
1627   }
1628   ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1629   ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1630   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1631 
1632   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1633 
1634   /*
1635     For each node in the grid: we get the neighbors in the local (on processor ordering
1636     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1637     PETSc ordering.
1638   */
1639   if (!da->prealloc_only) {
1640     ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr);
1641     for (i=xs; i<xs+nx; i++) {
1642       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1643       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1644       for (j=ys; j<ys+ny; j++) {
1645         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1646         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1647         slot   = i - gxs + gnx*(j - gys);
1648 
1649         /* Find block columns in block row */
1650         cnt = 0;
1651         for (ii=istart; ii<iend+1; ii++) {
1652           for (jj=jstart; jj<jend+1; jj++) {
1653             if (st == DMDA_STENCIL_BOX || !ii || !jj) {
1654               cols[cnt++] = slot + ii + gnx*jj;
1655             }
1656           }
1657         }
1658         ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
1659         ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1660       }
1661     }
1662     ierr = PetscFree(values);CHKERRQ(ierr);
1663     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1664     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1665     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1666   }
1667   ierr = PetscFree(cols);CHKERRQ(ierr);
1668   PetscFunctionReturn(0);
1669 }
1670 
1671 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da,Mat J)
1672 {
1673   PetscErrorCode         ierr;
1674   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1675   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
1676   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1677   MPI_Comm               comm;
1678   PetscScalar            *values;
1679   DMBoundaryType         bx,by,bz;
1680   DMDAStencilType        st;
1681   ISLocalToGlobalMapping ltog;
1682 
1683   PetscFunctionBegin;
1684   /*
1685      nc - number of components per grid point
1686      col - number of colors needed in one direction for single component problem
1687   */
1688   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1689   col  = 2*s + 1;
1690 
1691   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1692   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1693   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1694 
1695   /* create the matrix */
1696   ierr = PetscMalloc1(col*col*col,&cols);CHKERRQ(ierr);
1697 
1698   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1699 
1700   /* determine the matrix preallocation information */
1701   ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1702   for (i=xs; i<xs+nx; i++) {
1703     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1704     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1705     for (j=ys; j<ys+ny; j++) {
1706       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1707       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1708       for (k=zs; k<zs+nz; k++) {
1709         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1710         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1711 
1712         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1713 
1714         /* Find block columns in block row */
1715         cnt = 0;
1716         for (ii=istart; ii<iend+1; ii++) {
1717           for (jj=jstart; jj<jend+1; jj++) {
1718             for (kk=kstart; kk<kend+1; kk++) {
1719               if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
1720                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1721               }
1722             }
1723           }
1724         }
1725         ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
1726         ierr = MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
1727       }
1728     }
1729   }
1730   ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1731   ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1732   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1733 
1734   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1735 
1736   /*
1737     For each node in the grid: we get the neighbors in the local (on processor ordering
1738     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1739     PETSc ordering.
1740   */
1741   if (!da->prealloc_only) {
1742     ierr = PetscCalloc1(col*col*col*nc*nc,&values);CHKERRQ(ierr);
1743     for (i=xs; i<xs+nx; i++) {
1744       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1745       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1746       for (j=ys; j<ys+ny; j++) {
1747         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1748         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1749         for (k=zs; k<zs+nz; k++) {
1750           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1751           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1752 
1753           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1754 
1755           cnt = 0;
1756           for (ii=istart; ii<iend+1; ii++) {
1757             for (jj=jstart; jj<jend+1; jj++) {
1758               for (kk=kstart; kk<kend+1; kk++) {
1759                 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
1760                   cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1761                 }
1762               }
1763             }
1764           }
1765           ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
1766           ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1767         }
1768       }
1769     }
1770     ierr = PetscFree(values);CHKERRQ(ierr);
1771     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1772     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1773     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1774   }
1775   ierr = PetscFree(cols);CHKERRQ(ierr);
1776   PetscFunctionReturn(0);
1777 }
1778 
1779 /* ---------------------------------------------------------------------------------*/
1780 
1781 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da,Mat J)
1782 {
1783   PetscErrorCode         ierr;
1784   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1785   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt, maxcnt = 0,l,p,*dnz,*onz;
1786   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
1787   DM_DA                  *dd = (DM_DA*)da->data;
1788   PetscInt               ifill_col,*dfill = dd->dfill,*ofill = dd->ofill;
1789   MPI_Comm               comm;
1790   PetscScalar            *values;
1791   DMBoundaryType         bx,by,bz;
1792   ISLocalToGlobalMapping ltog;
1793   DMDAStencilType        st;
1794   PetscBool              removedups = PETSC_FALSE;
1795 
1796   PetscFunctionBegin;
1797   /*
1798          nc - number of components per grid point
1799          col - number of colors needed in one direction for single component problem
1800 
1801   */
1802   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1803   col  = 2*s + 1;
1804   if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
1805                  by 2*stencil_width + 1\n");
1806   if (by == DM_BOUNDARY_PERIODIC && (n % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
1807                  by 2*stencil_width + 1\n");
1808   if (bz == DM_BOUNDARY_PERIODIC && (p % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
1809                  by 2*stencil_width + 1\n");
1810 
1811   /*
1812        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1813        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1814   */
1815   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1816   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1817   if (P == 1 && 2*s >= p) removedups = PETSC_TRUE;
1818 
1819   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1820   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1821   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1822 
1823   ierr = PetscMalloc1(col*col*col*nc,&cols);CHKERRQ(ierr);
1824   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1825 
1826   /* determine the matrix preallocation information */
1827   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1828 
1829   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1830   for (i=xs; i<xs+nx; i++) {
1831     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1832     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1833     for (j=ys; j<ys+ny; j++) {
1834       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1835       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1836       for (k=zs; k<zs+nz; k++) {
1837         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1838         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1839 
1840         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1841 
1842         for (l=0; l<nc; l++) {
1843           cnt = 0;
1844           for (ii=istart; ii<iend+1; ii++) {
1845             for (jj=jstart; jj<jend+1; jj++) {
1846               for (kk=kstart; kk<kend+1; kk++) {
1847                 if (ii || jj || kk) {
1848                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1849                     for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1850                   }
1851                 } else {
1852                   if (dfill) {
1853                     for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1854                   } else {
1855                     for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1856                   }
1857                 }
1858               }
1859             }
1860           }
1861           row  = l + nc*(slot);
1862           maxcnt = PetscMax(maxcnt,cnt);
1863           if (removedups) {
1864             ierr = MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1865           } else {
1866             ierr = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1867           }
1868         }
1869       }
1870     }
1871   }
1872   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1873   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1874   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1875   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1876 
1877   /*
1878     For each node in the grid: we get the neighbors in the local (on processor ordering
1879     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1880     PETSc ordering.
1881   */
1882   if (!da->prealloc_only) {
1883     ierr = PetscCalloc1(maxcnt,&values);CHKERRQ(ierr);
1884     for (i=xs; i<xs+nx; i++) {
1885       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1886       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1887       for (j=ys; j<ys+ny; j++) {
1888         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1889         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1890         for (k=zs; k<zs+nz; k++) {
1891           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1892           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1893 
1894           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1895 
1896           for (l=0; l<nc; l++) {
1897             cnt = 0;
1898             for (ii=istart; ii<iend+1; ii++) {
1899               for (jj=jstart; jj<jend+1; jj++) {
1900                 for (kk=kstart; kk<kend+1; kk++) {
1901                   if (ii || jj || kk) {
1902                     if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1903                       for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1904                     }
1905                   } else {
1906                     if (dfill) {
1907                       for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1908                     } else {
1909                       for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1910                     }
1911                   }
1912                 }
1913               }
1914             }
1915             row  = l + nc*(slot);
1916             ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1917           }
1918         }
1919       }
1920     }
1921     ierr = PetscFree(values);CHKERRQ(ierr);
1922     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1923     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1924     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1925   }
1926   ierr = PetscFree(cols);CHKERRQ(ierr);
1927   PetscFunctionReturn(0);
1928 }
1929