xref: /petsc/src/dm/impls/da/fdda.c (revision abc0d4ab2e6c3c7b2bf7c15384f19f6a0d23fa69)
1 
2 #include <petsc/private/dmdaimpl.h> /*I      "petscdmda.h"     I*/
3 #include <petscmat.h>
4 
5 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM,ISColoringType,ISColoring*);
6 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM,ISColoringType,ISColoring*);
7 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM,ISColoringType,ISColoring*);
8 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM,ISColoringType,ISColoring*);
9 
10 /*
11    For ghost i that may be negative or greater than the upper bound this
12   maps it into the 0:m-1 range using periodicity
13 */
14 #define SetInRange(i,m) ((i < 0) ? m+i : ((i >= m) ? i-m : i))
15 
16 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill,PetscInt w,PetscInt **rfill)
17 {
18   PetscErrorCode ierr;
19   PetscInt       i,j,nz,*fill;
20 
21   PetscFunctionBegin;
22   if (!dfill) PetscFunctionReturn(0);
23 
24   /* count number nonzeros */
25   nz = 0;
26   for (i=0; i<w; i++) {
27     for (j=0; j<w; j++) {
28       if (dfill[w*i+j]) nz++;
29     }
30   }
31   ierr = PetscMalloc1(nz + w + 1,&fill);CHKERRQ(ierr);
32   /* construct modified CSR storage of nonzero structure */
33   /*  fill[0 -- w] marks starts of each row of column indices (and end of last row)
34    so fill[1] - fill[0] gives number of nonzeros in first row etc */
35   nz = w + 1;
36   for (i=0; i<w; i++) {
37     fill[i] = nz;
38     for (j=0; j<w; j++) {
39       if (dfill[w*i+j]) {
40         fill[nz] = j;
41         nz++;
42       }
43     }
44   }
45   fill[w] = nz;
46 
47   *rfill = fill;
48   PetscFunctionReturn(0);
49 }
50 
51 /*@
52     DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem
53     of the matrix returned by DMCreateMatrix().
54 
55     Logically Collective on DMDA
56 
57     Input Parameter:
58 +   da - the distributed array
59 .   dfill - the fill pattern in the diagonal block (may be NULL, means use dense block)
60 -   ofill - the fill pattern in the off-diagonal blocks
61 
62 
63     Level: developer
64 
65     Notes: This only makes sense when you are doing multicomponent problems but using the
66        MPIAIJ matrix format
67 
68            The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries
69        representing coupling and 0 entries for missing coupling. For example
70 $             dfill[9] = {1, 0, 0,
71 $                         1, 1, 0,
72 $                         0, 1, 1}
73        means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with
74        itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the
75        diagonal block).
76 
77      DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
78      can be represented in the dfill, ofill format
79 
80    Contributed by Glenn Hammond
81 
82 .seealso DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly()
83 
84 @*/
85 PetscErrorCode  DMDASetBlockFills(DM da,const PetscInt *dfill,const PetscInt *ofill)
86 {
87   DM_DA          *dd = (DM_DA*)da->data;
88   PetscErrorCode ierr;
89   PetscInt       i,k,cnt = 1;
90 
91   PetscFunctionBegin;
92   ierr = DMDASetBlockFills_Private(dfill,dd->w,&dd->dfill);CHKERRQ(ierr);
93   ierr = DMDASetBlockFills_Private(ofill,dd->w,&dd->ofill);CHKERRQ(ierr);
94 
95   /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of
96    columns to the left with any nonzeros in them plus 1 */
97   ierr = PetscCalloc1(dd->w,&dd->ofillcols);CHKERRQ(ierr);
98   for (i=0; i<dd->w; i++) {
99     for (k=dd->ofill[i]; k<dd->ofill[i+1]; k++) dd->ofillcols[dd->ofill[k]] = 1;
100   }
101   for (i=0; i<dd->w; i++) {
102     if (dd->ofillcols[i]) {
103       dd->ofillcols[i] = cnt++;
104     }
105   }
106   PetscFunctionReturn(0);
107 }
108 
109 
110 PetscErrorCode  DMCreateColoring_DA(DM da,ISColoringType ctype,ISColoring *coloring)
111 {
112   PetscErrorCode   ierr;
113   PetscInt         dim,m,n,p,nc;
114   DMBoundaryType bx,by,bz;
115   MPI_Comm         comm;
116   PetscMPIInt      size;
117   PetscBool        isBAIJ;
118   DM_DA            *dd = (DM_DA*)da->data;
119 
120   PetscFunctionBegin;
121   /*
122                                   m
123           ------------------------------------------------------
124          |                                                     |
125          |                                                     |
126          |               ----------------------                |
127          |               |                    |                |
128       n  |           yn  |                    |                |
129          |               |                    |                |
130          |               .---------------------                |
131          |             (xs,ys)     xn                          |
132          |            .                                        |
133          |         (gxs,gys)                                   |
134          |                                                     |
135           -----------------------------------------------------
136   */
137 
138   /*
139          nc - number of components per grid point
140          col - number of colors needed in one direction for single component problem
141 
142   */
143   ierr = DMDAGetInfo(da,&dim,0,0,0,&m,&n,&p,&nc,0,&bx,&by,&bz,0);CHKERRQ(ierr);
144 
145   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
146   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
147   if (ctype == IS_COLORING_LOCAL) {
148     if (size == 1) {
149       ctype = IS_COLORING_GLOBAL;
150     } else if (dim > 1) {
151       if ((m==1 && bx == DM_BOUNDARY_PERIODIC) || (n==1 && by == DM_BOUNDARY_PERIODIC) || (p==1 && bz == DM_BOUNDARY_PERIODIC)) {
152         SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain  on the same process");
153       }
154     }
155   }
156 
157   /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ
158      matrices is for the blocks, not the individual matrix elements  */
159   ierr = PetscStrcmp(da->mattype,MATBAIJ,&isBAIJ);CHKERRQ(ierr);
160   if (!isBAIJ) {ierr = PetscStrcmp(da->mattype,MATMPIBAIJ,&isBAIJ);CHKERRQ(ierr);}
161   if (!isBAIJ) {ierr = PetscStrcmp(da->mattype,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);}
162   if (isBAIJ) {
163     dd->w  = 1;
164     dd->xs = dd->xs/nc;
165     dd->xe = dd->xe/nc;
166     dd->Xs = dd->Xs/nc;
167     dd->Xe = dd->Xe/nc;
168   }
169 
170   /*
171      We do not provide a getcoloring function in the DMDA operations because
172    the basic DMDA does not know about matrices. We think of DMDA as being more
173    more low-level then matrices.
174   */
175   if (dim == 1) {
176     ierr = DMCreateColoring_DA_1d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
177   } else if (dim == 2) {
178     ierr =  DMCreateColoring_DA_2d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
179   } else if (dim == 3) {
180     ierr =  DMCreateColoring_DA_3d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
181   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not done for %D dimension, send us mail petsc-maint@mcs.anl.gov for code",dim);
182   if (isBAIJ) {
183     dd->w  = nc;
184     dd->xs = dd->xs*nc;
185     dd->xe = dd->xe*nc;
186     dd->Xs = dd->Xs*nc;
187     dd->Xe = dd->Xe*nc;
188   }
189   PetscFunctionReturn(0);
190 }
191 
192 /* ---------------------------------------------------------------------------------*/
193 
194 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
195 {
196   PetscErrorCode   ierr;
197   PetscInt         xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,M,N,dim,s,k,nc,col;
198   PetscInt         ncolors;
199   MPI_Comm         comm;
200   DMBoundaryType bx,by;
201   DMDAStencilType  st;
202   ISColoringValue  *colors;
203   DM_DA            *dd = (DM_DA*)da->data;
204 
205   PetscFunctionBegin;
206   /*
207          nc - number of components per grid point
208          col - number of colors needed in one direction for single component problem
209 
210   */
211   ierr = DMDAGetInfo(da,&dim,&m,&n,0,&M,&N,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
212   col  = 2*s + 1;
213   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
214   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
215   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
216 
217   /* special case as taught to us by Paul Hovland */
218   if (st == DMDA_STENCIL_STAR && s == 1) {
219     ierr = DMCreateColoring_DA_2d_5pt_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
220   } else {
221 
222     if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ2(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X (%d) is divisible\n\
223                                                             by 2*stencil_width + 1 (%d)\n", m, col);
224     if (by == DM_BOUNDARY_PERIODIC && (n % col)) SETERRQ2(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y (%d) is divisible\n\
225                                                             by 2*stencil_width + 1 (%d)\n", n, col);
226     if (ctype == IS_COLORING_GLOBAL) {
227       if (!dd->localcoloring) {
228         ierr = PetscMalloc1(nc*nx*ny,&colors);CHKERRQ(ierr);
229         ii   = 0;
230         for (j=ys; j<ys+ny; j++) {
231           for (i=xs; i<xs+nx; i++) {
232             for (k=0; k<nc; k++) {
233               colors[ii++] = k + nc*((i % col) + col*(j % col));
234             }
235           }
236         }
237         ncolors = nc + nc*(col-1 + col*(col-1));
238         ierr    = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
239       }
240       *coloring = dd->localcoloring;
241     } else if (ctype == IS_COLORING_LOCAL) {
242       if (!dd->ghostedcoloring) {
243         ierr = PetscMalloc1(nc*gnx*gny,&colors);CHKERRQ(ierr);
244         ii   = 0;
245         for (j=gys; j<gys+gny; j++) {
246           for (i=gxs; i<gxs+gnx; i++) {
247             for (k=0; k<nc; k++) {
248               /* the complicated stuff is to handle periodic boundaries */
249               colors[ii++] = k + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col));
250             }
251           }
252         }
253         ncolors = nc + nc*(col - 1 + col*(col-1));
254         ierr    = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
255         /* PetscIntView(ncolors,(PetscInt*)colors,0); */
256 
257         ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
258       }
259       *coloring = dd->ghostedcoloring;
260     } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
261   }
262   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
263   PetscFunctionReturn(0);
264 }
265 
266 /* ---------------------------------------------------------------------------------*/
267 
268 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
269 {
270   PetscErrorCode   ierr;
271   PetscInt         xs,ys,nx,ny,i,j,gxs,gys,gnx,gny,m,n,p,dim,s,k,nc,col,zs,gzs,ii,l,nz,gnz,M,N,P;
272   PetscInt         ncolors;
273   MPI_Comm         comm;
274   DMBoundaryType bx,by,bz;
275   DMDAStencilType  st;
276   ISColoringValue  *colors;
277   DM_DA            *dd = (DM_DA*)da->data;
278 
279   PetscFunctionBegin;
280   /*
281          nc - number of components per grid point
282          col - number of colors needed in one direction for single component problem
283 
284   */
285   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
286   col  = 2*s + 1;
287   if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
288                                                          by 2*stencil_width + 1\n");
289   if (by == DM_BOUNDARY_PERIODIC && (n % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
290                                                          by 2*stencil_width + 1\n");
291   if (bz == DM_BOUNDARY_PERIODIC && (p % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
292                                                          by 2*stencil_width + 1\n");
293 
294   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
295   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
296   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
297 
298   /* create the coloring */
299   if (ctype == IS_COLORING_GLOBAL) {
300     if (!dd->localcoloring) {
301       ierr = PetscMalloc1(nc*nx*ny*nz,&colors);CHKERRQ(ierr);
302       ii   = 0;
303       for (k=zs; k<zs+nz; k++) {
304         for (j=ys; j<ys+ny; j++) {
305           for (i=xs; i<xs+nx; i++) {
306             for (l=0; l<nc; l++) {
307               colors[ii++] = l + nc*((i % col) + col*(j % col) + col*col*(k % col));
308             }
309           }
310         }
311       }
312       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
313       ierr    = ISColoringCreate(comm,ncolors,nc*nx*ny*nz,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
314     }
315     *coloring = dd->localcoloring;
316   } else if (ctype == IS_COLORING_LOCAL) {
317     if (!dd->ghostedcoloring) {
318       ierr = PetscMalloc1(nc*gnx*gny*gnz,&colors);CHKERRQ(ierr);
319       ii   = 0;
320       for (k=gzs; k<gzs+gnz; k++) {
321         for (j=gys; j<gys+gny; j++) {
322           for (i=gxs; i<gxs+gnx; i++) {
323             for (l=0; l<nc; l++) {
324               /* the complicated stuff is to handle periodic boundaries */
325               colors[ii++] = l + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col) + col*col*(SetInRange(k,p) % col));
326             }
327           }
328         }
329       }
330       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
331       ierr    = ISColoringCreate(comm,ncolors,nc*gnx*gny*gnz,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
332       ierr    = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
333     }
334     *coloring = dd->ghostedcoloring;
335   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
336   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
337   PetscFunctionReturn(0);
338 }
339 
340 /* ---------------------------------------------------------------------------------*/
341 
342 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
343 {
344   PetscErrorCode   ierr;
345   PetscInt         xs,nx,i,i1,gxs,gnx,l,m,M,dim,s,nc,col;
346   PetscInt         ncolors;
347   MPI_Comm         comm;
348   DMBoundaryType bx;
349   ISColoringValue  *colors;
350   DM_DA            *dd = (DM_DA*)da->data;
351 
352   PetscFunctionBegin;
353   /*
354          nc - number of components per grid point
355          col - number of colors needed in one direction for single component problem
356 
357   */
358   ierr = DMDAGetInfo(da,&dim,&m,0,0,&M,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr);
359   col  = 2*s + 1;
360 
361   if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ2(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points %d is divisible\n\
362                                                           by 2*stencil_width + 1 %d\n",(int)m,(int)col);
363 
364   ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
365   ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
366   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
367 
368   /* create the coloring */
369   if (ctype == IS_COLORING_GLOBAL) {
370     if (!dd->localcoloring) {
371       ierr = PetscMalloc1(nc*nx,&colors);CHKERRQ(ierr);
372       if (dd->ofillcols) {
373         PetscInt tc = 0;
374         for (i=0; i<nc; i++) tc += (PetscInt) (dd->ofillcols[i] > 0);
375         i1 = 0;
376         for (i=xs; i<xs+nx; i++) {
377           for (l=0; l<nc; l++) {
378             if (dd->ofillcols[l] && (i % col)) {
379               colors[i1++] =  nc - 1 + tc*((i % col) - 1) + dd->ofillcols[l];
380             } else {
381               colors[i1++] = l;
382             }
383           }
384         }
385         ncolors = nc + 2*s*tc;
386       } else {
387         i1 = 0;
388         for (i=xs; i<xs+nx; i++) {
389           for (l=0; l<nc; l++) {
390             colors[i1++] = l + nc*(i % col);
391           }
392         }
393         ncolors = nc + nc*(col-1);
394       }
395       ierr = ISColoringCreate(comm,ncolors,nc*nx,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
396     }
397     *coloring = dd->localcoloring;
398   } else if (ctype == IS_COLORING_LOCAL) {
399     if (!dd->ghostedcoloring) {
400       ierr = PetscMalloc1(nc*gnx,&colors);CHKERRQ(ierr);
401       i1   = 0;
402       for (i=gxs; i<gxs+gnx; i++) {
403         for (l=0; l<nc; l++) {
404           /* the complicated stuff is to handle periodic boundaries */
405           colors[i1++] = l + nc*(SetInRange(i,m) % col);
406         }
407       }
408       ncolors = nc + nc*(col-1);
409       ierr    = ISColoringCreate(comm,ncolors,nc*gnx,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
410       ierr    = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
411     }
412     *coloring = dd->ghostedcoloring;
413   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
414   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
415   PetscFunctionReturn(0);
416 }
417 
418 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
419 {
420   PetscErrorCode   ierr;
421   PetscInt         xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,dim,s,k,nc;
422   PetscInt         ncolors;
423   MPI_Comm         comm;
424   DMBoundaryType bx,by;
425   ISColoringValue  *colors;
426   DM_DA            *dd = (DM_DA*)da->data;
427 
428   PetscFunctionBegin;
429   /*
430          nc - number of components per grid point
431          col - number of colors needed in one direction for single component problem
432 
433   */
434   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,0);CHKERRQ(ierr);
435   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
436   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
437   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
438 
439   if (bx == DM_BOUNDARY_PERIODIC && (m % 5)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible by 5\n");
440   if (by == DM_BOUNDARY_PERIODIC && (n % 5)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible by 5\n");
441 
442   /* create the coloring */
443   if (ctype == IS_COLORING_GLOBAL) {
444     if (!dd->localcoloring) {
445       ierr = PetscMalloc1(nc*nx*ny,&colors);CHKERRQ(ierr);
446       ii   = 0;
447       for (j=ys; j<ys+ny; j++) {
448         for (i=xs; i<xs+nx; i++) {
449           for (k=0; k<nc; k++) {
450             colors[ii++] = k + nc*((3*j+i) % 5);
451           }
452         }
453       }
454       ncolors = 5*nc;
455       ierr    = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
456     }
457     *coloring = dd->localcoloring;
458   } else if (ctype == IS_COLORING_LOCAL) {
459     if (!dd->ghostedcoloring) {
460       ierr = PetscMalloc1(nc*gnx*gny,&colors);CHKERRQ(ierr);
461       ii = 0;
462       for (j=gys; j<gys+gny; j++) {
463         for (i=gxs; i<gxs+gnx; i++) {
464           for (k=0; k<nc; k++) {
465             colors[ii++] = k + nc*((3*SetInRange(j,n) + SetInRange(i,m)) % 5);
466           }
467         }
468       }
469       ncolors = 5*nc;
470       ierr    = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
471       ierr    = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
472     }
473     *coloring = dd->ghostedcoloring;
474   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
475   PetscFunctionReturn(0);
476 }
477 
478 /* =========================================================================== */
479 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM,Mat);
480 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM,Mat);
481 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM,Mat);
482 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM,Mat);
483 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM,Mat);
484 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM,Mat);
485 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM,Mat);
486 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM,Mat);
487 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM,Mat);
488 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM,Mat);
489 
490 /*@C
491    MatSetupDM - Sets the DMDA that is to be used by the HYPRE_StructMatrix PETSc matrix
492 
493    Logically Collective on Mat
494 
495    Input Parameters:
496 +  mat - the matrix
497 -  da - the da
498 
499    Level: intermediate
500 
501 @*/
502 PetscErrorCode MatSetupDM(Mat mat,DM da)
503 {
504   PetscErrorCode ierr;
505 
506   PetscFunctionBegin;
507   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
508   PetscValidHeaderSpecific(da,DM_CLASSID,1);
509   ierr = PetscTryMethod(mat,"MatSetupDM_C",(Mat,DM),(mat,da));CHKERRQ(ierr);
510   PetscFunctionReturn(0);
511 }
512 
513 PetscErrorCode  MatView_MPI_DA(Mat A,PetscViewer viewer)
514 {
515   DM                da;
516   PetscErrorCode    ierr;
517   const char        *prefix;
518   Mat               Anatural;
519   AO                ao;
520   PetscInt          rstart,rend,*petsc,i;
521   IS                is;
522   MPI_Comm          comm;
523   PetscViewerFormat format;
524 
525   PetscFunctionBegin;
526   /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */
527   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
528   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(0);
529 
530   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
531   ierr = MatGetDM(A, &da);CHKERRQ(ierr);
532   if (!da) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
533 
534   ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);
535   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
536   ierr = PetscMalloc1(rend-rstart,&petsc);CHKERRQ(ierr);
537   for (i=rstart; i<rend; i++) petsc[i-rstart] = i;
538   ierr = AOApplicationToPetsc(ao,rend-rstart,petsc);CHKERRQ(ierr);
539   ierr = ISCreateGeneral(comm,rend-rstart,petsc,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
540 
541   /* call viewer on natural ordering */
542   ierr = MatCreateSubMatrix(A,is,is,MAT_INITIAL_MATRIX,&Anatural);CHKERRQ(ierr);
543   ierr = ISDestroy(&is);CHKERRQ(ierr);
544   ierr = PetscObjectGetOptionsPrefix((PetscObject)A,&prefix);CHKERRQ(ierr);
545   ierr = PetscObjectSetOptionsPrefix((PetscObject)Anatural,prefix);CHKERRQ(ierr);
546   ierr = PetscObjectSetName((PetscObject)Anatural,((PetscObject)A)->name);CHKERRQ(ierr);
547   ierr = MatView(Anatural,viewer);CHKERRQ(ierr);
548   ierr = MatDestroy(&Anatural);CHKERRQ(ierr);
549   PetscFunctionReturn(0);
550 }
551 
552 PetscErrorCode  MatLoad_MPI_DA(Mat A,PetscViewer viewer)
553 {
554   DM             da;
555   PetscErrorCode ierr;
556   Mat            Anatural,Aapp;
557   AO             ao;
558   PetscInt       rstart,rend,*app,i,m,n,M,N;
559   IS             is;
560   MPI_Comm       comm;
561 
562   PetscFunctionBegin;
563   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
564   ierr = MatGetDM(A, &da);CHKERRQ(ierr);
565   if (!da) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
566 
567   /* Load the matrix in natural ordering */
568   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Anatural);CHKERRQ(ierr);
569   ierr = MatSetType(Anatural,((PetscObject)A)->type_name);CHKERRQ(ierr);
570   ierr = MatGetSize(A,&M,&N);CHKERRQ(ierr);
571   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
572   ierr = MatSetSizes(Anatural,m,n,M,N);CHKERRQ(ierr);
573   ierr = MatLoad(Anatural,viewer);CHKERRQ(ierr);
574 
575   /* Map natural ordering to application ordering and create IS */
576   ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);
577   ierr = MatGetOwnershipRange(Anatural,&rstart,&rend);CHKERRQ(ierr);
578   ierr = PetscMalloc1(rend-rstart,&app);CHKERRQ(ierr);
579   for (i=rstart; i<rend; i++) app[i-rstart] = i;
580   ierr = AOPetscToApplication(ao,rend-rstart,app);CHKERRQ(ierr);
581   ierr = ISCreateGeneral(comm,rend-rstart,app,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
582 
583   /* Do permutation and replace header */
584   ierr = MatCreateSubMatrix(Anatural,is,is,MAT_INITIAL_MATRIX,&Aapp);CHKERRQ(ierr);
585   ierr = MatHeaderReplace(A,&Aapp);CHKERRQ(ierr);
586   ierr = ISDestroy(&is);CHKERRQ(ierr);
587   ierr = MatDestroy(&Anatural);CHKERRQ(ierr);
588   PetscFunctionReturn(0);
589 }
590 
591 PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J)
592 {
593   PetscErrorCode ierr;
594   PetscInt       dim,dof,nx,ny,nz,dims[3],starts[3],M,N,P;
595   Mat            A;
596   MPI_Comm       comm;
597   MatType        Atype;
598   PetscSection   section, sectionGlobal;
599   void           (*aij)(void)=NULL,(*baij)(void)=NULL,(*sbaij)(void)=NULL;
600   MatType        mtype;
601   PetscMPIInt    size;
602   DM_DA          *dd = (DM_DA*)da->data;
603 
604   PetscFunctionBegin;
605   ierr = MatInitializePackage();CHKERRQ(ierr);
606   mtype = da->mattype;
607 
608   ierr = DMGetDefaultSection(da, &section);CHKERRQ(ierr);
609   if (section) {
610     PetscInt  bs = -1;
611     PetscInt  localSize;
612     PetscBool isShell, isBlock, isSeqBlock, isMPIBlock, isSymBlock, isSymSeqBlock, isSymMPIBlock, isSymmetric;
613 
614     ierr = DMGetDefaultGlobalSection(da, &sectionGlobal);CHKERRQ(ierr);
615     ierr = PetscSectionGetConstrainedStorageSize(sectionGlobal, &localSize);CHKERRQ(ierr);
616     ierr = MatCreate(PetscObjectComm((PetscObject)da),&A);CHKERRQ(ierr);
617     ierr = MatSetSizes(A,localSize,localSize,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
618     ierr = MatSetType(A,mtype);CHKERRQ(ierr);
619     ierr = PetscStrcmp(mtype,MATSHELL,&isShell);CHKERRQ(ierr);
620     ierr = PetscStrcmp(mtype,MATBAIJ,&isBlock);CHKERRQ(ierr);
621     ierr = PetscStrcmp(mtype,MATSEQBAIJ,&isSeqBlock);CHKERRQ(ierr);
622     ierr = PetscStrcmp(mtype,MATMPIBAIJ,&isMPIBlock);CHKERRQ(ierr);
623     ierr = PetscStrcmp(mtype,MATSBAIJ,&isSymBlock);CHKERRQ(ierr);
624     ierr = PetscStrcmp(mtype,MATSEQSBAIJ,&isSymSeqBlock);CHKERRQ(ierr);
625     ierr = PetscStrcmp(mtype,MATMPISBAIJ,&isSymMPIBlock);CHKERRQ(ierr);
626     /* Check for symmetric storage */
627     isSymmetric = (PetscBool) (isSymBlock || isSymSeqBlock || isSymMPIBlock);
628     if (isSymmetric) {
629       ierr = MatSetOption(*J, MAT_IGNORE_LOWER_TRIANGULAR, PETSC_TRUE);CHKERRQ(ierr);
630     }
631     if (!isShell) {
632       PetscInt *dnz, *onz, *dnzu, *onzu, bsLocal;
633 
634       if (bs < 0) {
635         if (isBlock || isSeqBlock || isMPIBlock || isSymBlock || isSymSeqBlock || isSymMPIBlock) {
636           PetscInt pStart, pEnd, p, dof;
637 
638           ierr = PetscSectionGetChart(sectionGlobal, &pStart, &pEnd);CHKERRQ(ierr);
639           for (p = pStart; p < pEnd; ++p) {
640             ierr = PetscSectionGetDof(sectionGlobal, p, &dof);CHKERRQ(ierr);
641             if (dof) {
642               bs = dof;
643               break;
644             }
645           }
646         } else {
647           bs = 1;
648         }
649         /* Must have same blocksize on all procs (some might have no points) */
650         bsLocal = bs;
651         ierr    = MPIU_Allreduce(&bsLocal, &bs, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)da));CHKERRQ(ierr);
652       }
653       ierr = PetscCalloc4(localSize/bs, &dnz, localSize/bs, &onz, localSize/bs, &dnzu, localSize/bs, &onzu);CHKERRQ(ierr);
654       /* ierr = DMPlexPreallocateOperator(dm, bs, section, sectionGlobal, dnz, onz, dnzu, onzu, *J, fillMatrix);CHKERRQ(ierr); */
655       ierr = PetscFree4(dnz, onz, dnzu, onzu);CHKERRQ(ierr);
656     }
657   }
658   /*
659                                   m
660           ------------------------------------------------------
661          |                                                     |
662          |                                                     |
663          |               ----------------------                |
664          |               |                    |                |
665       n  |           ny  |                    |                |
666          |               |                    |                |
667          |               .---------------------                |
668          |             (xs,ys)     nx                          |
669          |            .                                        |
670          |         (gxs,gys)                                   |
671          |                                                     |
672           -----------------------------------------------------
673   */
674 
675   /*
676          nc - number of components per grid point
677          col - number of colors needed in one direction for single component problem
678 
679   */
680   M   = dd->M;
681   N   = dd->N;
682   P   = dd->P;
683   dim = da->dim;
684   dof = dd->w;
685   /* ierr = DMDAGetInfo(da,&dim,&M,&N,&P,0,0,0,&dof,0,0,0,0,0);CHKERRQ(ierr); */
686   ierr = DMDAGetCorners(da,0,0,0,&nx,&ny,&nz);CHKERRQ(ierr);
687   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
688   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
689   ierr = MatSetSizes(A,dof*nx*ny*nz,dof*nx*ny*nz,dof*M*N*P,dof*M*N*P);CHKERRQ(ierr);
690   ierr = MatSetType(A,mtype);CHKERRQ(ierr);
691   ierr = MatSetDM(A,da);CHKERRQ(ierr);
692   if (da->structure_only) {
693     ierr = MatSetOption(A,MAT_STRUCTURE_ONLY,PETSC_TRUE);CHKERRQ(ierr);
694   }
695   ierr = MatGetType(A,&Atype);CHKERRQ(ierr);
696   /*
697      We do not provide a getmatrix function in the DMDA operations because
698    the basic DMDA does not know about matrices. We think of DMDA as being more
699    more low-level than matrices. This is kind of cheating but, cause sometimes
700    we think of DMDA has higher level than matrices.
701 
702      We could switch based on Atype (or mtype), but we do not since the
703    specialized setting routines depend only the particular preallocation
704    details of the matrix, not the type itself.
705   */
706   ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJSetPreallocation_C",&aij);CHKERRQ(ierr);
707   if (!aij) {
708     ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJSetPreallocation_C",&aij);CHKERRQ(ierr);
709   }
710   if (!aij) {
711     ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIBAIJSetPreallocation_C",&baij);CHKERRQ(ierr);
712     if (!baij) {
713       ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",&baij);CHKERRQ(ierr);
714     }
715     if (!baij) {
716       ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPISBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr);
717       if (!sbaij) {
718         ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr);
719       }
720     }
721   }
722   if (aij) {
723     if (dim == 1) {
724       if (dd->ofill) {
725         ierr = DMCreateMatrix_DA_1d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
726       } else {
727         ierr = DMCreateMatrix_DA_1d_MPIAIJ(da,A);CHKERRQ(ierr);
728       }
729     } else if (dim == 2) {
730       if (dd->ofill) {
731         ierr = DMCreateMatrix_DA_2d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
732       } else {
733         ierr = DMCreateMatrix_DA_2d_MPIAIJ(da,A);CHKERRQ(ierr);
734       }
735     } else if (dim == 3) {
736       if (dd->ofill) {
737         ierr = DMCreateMatrix_DA_3d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
738       } else {
739         ierr = DMCreateMatrix_DA_3d_MPIAIJ(da,A);CHKERRQ(ierr);
740       }
741     }
742   } else if (baij) {
743     if (dim == 2) {
744       ierr = DMCreateMatrix_DA_2d_MPIBAIJ(da,A);CHKERRQ(ierr);
745     } else if (dim == 3) {
746       ierr = DMCreateMatrix_DA_3d_MPIBAIJ(da,A);CHKERRQ(ierr);
747     } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
748   } else if (sbaij) {
749     if (dim == 2) {
750       ierr = DMCreateMatrix_DA_2d_MPISBAIJ(da,A);CHKERRQ(ierr);
751     } else if (dim == 3) {
752       ierr = DMCreateMatrix_DA_3d_MPISBAIJ(da,A);CHKERRQ(ierr);
753     } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
754   } else {
755     ISLocalToGlobalMapping ltog;
756     ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
757     ierr = MatSetUp(A);CHKERRQ(ierr);
758     ierr = MatSetLocalToGlobalMapping(A,ltog,ltog);CHKERRQ(ierr);
759   }
760   ierr = DMDAGetGhostCorners(da,&starts[0],&starts[1],&starts[2],&dims[0],&dims[1],&dims[2]);CHKERRQ(ierr);
761   ierr = MatSetStencil(A,dim,dims,starts,dof);CHKERRQ(ierr);
762   ierr = MatSetDM(A,da);CHKERRQ(ierr);
763   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
764   if (size > 1) {
765     /* change viewer to display matrix in natural ordering */
766     ierr = MatShellSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA);CHKERRQ(ierr);
767     ierr = MatShellSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA);CHKERRQ(ierr);
768   }
769   ierr = MatSetFromOptions(A);CHKERRQ(ierr);
770   *J = A;
771   PetscFunctionReturn(0);
772 }
773 
774 /* ---------------------------------------------------------------------------------*/
775 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da,Mat J)
776 {
777   PetscErrorCode         ierr;
778   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,M,N;
779   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
780   MPI_Comm               comm;
781   PetscScalar            *values;
782   DMBoundaryType         bx,by;
783   ISLocalToGlobalMapping ltog;
784   DMDAStencilType        st;
785   PetscBool              removedups = PETSC_FALSE;
786 
787   PetscFunctionBegin;
788   /*
789          nc - number of components per grid point
790          col - number of colors needed in one direction for single component problem
791 
792   */
793   ierr = DMDAGetInfo(da,&dim,&m,&n,&M,&N,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
794   col  = 2*s + 1;
795   /*
796        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
797        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
798   */
799   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
800   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
801   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
802   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
803   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
804 
805   ierr = PetscMalloc2(nc,&rows,col*col*nc*nc,&cols);CHKERRQ(ierr);
806   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
807 
808   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
809   /* determine the matrix preallocation information */
810   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
811   for (i=xs; i<xs+nx; i++) {
812 
813     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
814     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
815 
816     for (j=ys; j<ys+ny; j++) {
817       slot = i - gxs + gnx*(j - gys);
818 
819       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
820       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
821 
822       cnt = 0;
823       for (k=0; k<nc; k++) {
824         for (l=lstart; l<lend+1; l++) {
825           for (p=pstart; p<pend+1; p++) {
826             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
827               cols[cnt++] = k + nc*(slot + gnx*l + p);
828             }
829           }
830         }
831         rows[k] = k + nc*(slot);
832       }
833       if (removedups) {
834         ierr = MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
835       } else {
836         ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
837       }
838     }
839   }
840   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
841   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
842   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
843   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
844 
845   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
846 
847   /*
848     For each node in the grid: we get the neighbors in the local (on processor ordering
849     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
850     PETSc ordering.
851   */
852   if (!da->prealloc_only) {
853     ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr);
854     for (i=xs; i<xs+nx; i++) {
855 
856       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
857       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
858 
859       for (j=ys; j<ys+ny; j++) {
860         slot = i - gxs + gnx*(j - gys);
861 
862         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
863         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
864 
865         cnt = 0;
866         for (k=0; k<nc; k++) {
867           for (l=lstart; l<lend+1; l++) {
868             for (p=pstart; p<pend+1; p++) {
869               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
870                 cols[cnt++] = k + nc*(slot + gnx*l + p);
871               }
872             }
873           }
874           rows[k] = k + nc*(slot);
875         }
876         ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
877       }
878     }
879     ierr = PetscFree(values);CHKERRQ(ierr);
880     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
881     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
882     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
883   }
884   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
885   PetscFunctionReturn(0);
886 }
887 
888 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da,Mat J)
889 {
890   PetscErrorCode         ierr;
891   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
892   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt,maxcnt = 0,l,p,M,N;
893   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
894   DM_DA                  *dd = (DM_DA*)da->data;
895   PetscInt               ifill_col,*ofill = dd->ofill, *dfill = dd->dfill;
896   MPI_Comm               comm;
897   PetscScalar            *values;
898   DMBoundaryType         bx,by;
899   ISLocalToGlobalMapping ltog;
900   DMDAStencilType        st;
901   PetscBool              removedups = PETSC_FALSE;
902 
903   PetscFunctionBegin;
904   /*
905          nc - number of components per grid point
906          col - number of colors needed in one direction for single component problem
907 
908   */
909   ierr = DMDAGetInfo(da,&dim,&m,&n,&M,&N,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
910   col  = 2*s + 1;
911   /*
912        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
913        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
914   */
915   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
916   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
917   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
918   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
919   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
920 
921   ierr = PetscMalloc1(col*col*nc,&cols);CHKERRQ(ierr);
922   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
923 
924   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
925   /* determine the matrix preallocation information */
926   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
927   for (i=xs; i<xs+nx; i++) {
928 
929     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
930     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
931 
932     for (j=ys; j<ys+ny; j++) {
933       slot = i - gxs + gnx*(j - gys);
934 
935       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
936       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
937 
938       for (k=0; k<nc; k++) {
939         cnt = 0;
940         for (l=lstart; l<lend+1; l++) {
941           for (p=pstart; p<pend+1; p++) {
942             if (l || p) {
943               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
944                 for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p);
945               }
946             } else {
947               if (dfill) {
948                 for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p);
949               } else {
950                 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p);
951               }
952             }
953           }
954         }
955         row    = k + nc*(slot);
956         maxcnt = PetscMax(maxcnt,cnt);
957         if (removedups) {
958           ierr   = MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
959         } else {
960           ierr   = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
961         }
962       }
963     }
964   }
965   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
966   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
967   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
968   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
969 
970   /*
971     For each node in the grid: we get the neighbors in the local (on processor ordering
972     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
973     PETSc ordering.
974   */
975   if (!da->prealloc_only) {
976     ierr = PetscCalloc1(maxcnt,&values);CHKERRQ(ierr);
977     for (i=xs; i<xs+nx; i++) {
978 
979       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
980       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
981 
982       for (j=ys; j<ys+ny; j++) {
983         slot = i - gxs + gnx*(j - gys);
984 
985         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
986         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
987 
988         for (k=0; k<nc; k++) {
989           cnt = 0;
990           for (l=lstart; l<lend+1; l++) {
991             for (p=pstart; p<pend+1; p++) {
992               if (l || p) {
993                 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
994                   for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p);
995                 }
996               } else {
997                 if (dfill) {
998                   for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p);
999                 } else {
1000                   for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p);
1001                 }
1002               }
1003             }
1004           }
1005           row  = k + nc*(slot);
1006           ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1007         }
1008       }
1009     }
1010     ierr = PetscFree(values);CHKERRQ(ierr);
1011     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1012     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1013     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1014   }
1015   ierr = PetscFree(cols);CHKERRQ(ierr);
1016   PetscFunctionReturn(0);
1017 }
1018 
1019 /* ---------------------------------------------------------------------------------*/
1020 
1021 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da,Mat J)
1022 {
1023   PetscErrorCode         ierr;
1024   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1025   PetscInt               m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL;
1026   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
1027   MPI_Comm               comm;
1028   PetscScalar            *values;
1029   DMBoundaryType         bx,by,bz;
1030   ISLocalToGlobalMapping ltog;
1031   DMDAStencilType        st;
1032   PetscBool              removedups = PETSC_FALSE;
1033 
1034   PetscFunctionBegin;
1035   /*
1036          nc - number of components per grid point
1037          col - number of colors needed in one direction for single component problem
1038 
1039   */
1040   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1041   col  = 2*s + 1;
1042 
1043   /*
1044        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1045        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1046   */
1047   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1048   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1049   if (P == 1 && 2*s >= p) removedups = PETSC_TRUE;
1050 
1051   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1052   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1053   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1054 
1055   ierr = PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols);CHKERRQ(ierr);
1056   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1057 
1058   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1059   /* determine the matrix preallocation information */
1060   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1061   for (i=xs; i<xs+nx; i++) {
1062     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1063     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1064     for (j=ys; j<ys+ny; j++) {
1065       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1066       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1067       for (k=zs; k<zs+nz; k++) {
1068         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1069         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1070 
1071         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1072 
1073         cnt = 0;
1074         for (l=0; l<nc; l++) {
1075           for (ii=istart; ii<iend+1; ii++) {
1076             for (jj=jstart; jj<jend+1; jj++) {
1077               for (kk=kstart; kk<kend+1; kk++) {
1078                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1079                   cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1080                 }
1081               }
1082             }
1083           }
1084           rows[l] = l + nc*(slot);
1085         }
1086         if (removedups) {
1087           ierr = MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1088         } else {
1089           ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1090         }
1091       }
1092     }
1093   }
1094   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1095   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1096   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1097   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1098   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1099 
1100   /*
1101     For each node in the grid: we get the neighbors in the local (on processor ordering
1102     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1103     PETSc ordering.
1104   */
1105   if (!da->prealloc_only) {
1106     ierr = PetscCalloc1(col*col*col*nc*nc*nc,&values);CHKERRQ(ierr);
1107     for (i=xs; i<xs+nx; i++) {
1108       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1109       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1110       for (j=ys; j<ys+ny; j++) {
1111         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1112         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1113         for (k=zs; k<zs+nz; k++) {
1114           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1115           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1116 
1117           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1118 
1119           cnt = 0;
1120           for (l=0; l<nc; l++) {
1121             for (ii=istart; ii<iend+1; ii++) {
1122               for (jj=jstart; jj<jend+1; jj++) {
1123                 for (kk=kstart; kk<kend+1; kk++) {
1124                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1125                     cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1126                   }
1127                 }
1128               }
1129             }
1130             rows[l] = l + nc*(slot);
1131           }
1132           ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1133         }
1134       }
1135     }
1136     ierr = PetscFree(values);CHKERRQ(ierr);
1137     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1138     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1139     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1140   }
1141   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1142   PetscFunctionReturn(0);
1143 }
1144 
1145 /* ---------------------------------------------------------------------------------*/
1146 
1147 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da,Mat J)
1148 {
1149   PetscErrorCode         ierr;
1150   DM_DA                  *dd = (DM_DA*)da->data;
1151   PetscInt               xs,nx,i,j,gxs,gnx,row,k,l;
1152   PetscInt               m,dim,s,*cols = NULL,nc,cnt,maxcnt = 0,*ocols;
1153   PetscInt               *ofill = dd->ofill,*dfill = dd->dfill;
1154   PetscScalar            *values;
1155   DMBoundaryType         bx;
1156   ISLocalToGlobalMapping ltog;
1157   PetscMPIInt            rank,size;
1158 
1159   PetscFunctionBegin;
1160   if (dd->bx == DM_BOUNDARY_PERIODIC) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"With fill provided not implemented with periodic boundary conditions");
1161   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)da),&rank);CHKERRQ(ierr);
1162   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)da),&size);CHKERRQ(ierr);
1163 
1164   /*
1165          nc - number of components per grid point
1166 
1167   */
1168   ierr = DMDAGetInfo(da,&dim,&m,0,0,0,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr);
1169   ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
1170   ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
1171 
1172   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1173   ierr = PetscCalloc2(nx*nc,&cols,nx*nc,&ocols);CHKERRQ(ierr);
1174 
1175   /*
1176         note should be smaller for first and last process with no periodic
1177         does not handle dfill
1178   */
1179   cnt = 0;
1180   /* coupling with process to the left */
1181   for (i=0; i<s; i++) {
1182     for (j=0; j<nc; j++) {
1183       ocols[cnt] = ((!rank) ? 0 : (s - i)*(ofill[j+1] - ofill[j]));
1184       cols[cnt]  = dfill[j+1] - dfill[j] + (s + i)*(ofill[j+1] - ofill[j]);
1185       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1186       cnt++;
1187     }
1188   }
1189   for (i=s; i<nx-s; i++) {
1190     for (j=0; j<nc; j++) {
1191       cols[cnt] = dfill[j+1] - dfill[j] + 2*s*(ofill[j+1] - ofill[j]);
1192       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1193       cnt++;
1194     }
1195   }
1196   /* coupling with process to the right */
1197   for (i=nx-s; i<nx; i++) {
1198     for (j=0; j<nc; j++) {
1199       ocols[cnt] = ((rank == (size-1)) ? 0 : (i - nx + s + 1)*(ofill[j+1] - ofill[j]));
1200       cols[cnt]  = dfill[j+1] - dfill[j] + (s + nx - i - 1)*(ofill[j+1] - ofill[j]);
1201       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1202       cnt++;
1203     }
1204   }
1205 
1206   ierr = MatSeqAIJSetPreallocation(J,0,cols);CHKERRQ(ierr);
1207   ierr = MatMPIAIJSetPreallocation(J,0,cols,0,ocols);CHKERRQ(ierr);
1208   ierr = PetscFree2(cols,ocols);CHKERRQ(ierr);
1209 
1210   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1211   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1212 
1213   /*
1214     For each node in the grid: we get the neighbors in the local (on processor ordering
1215     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1216     PETSc ordering.
1217   */
1218   if (!da->prealloc_only) {
1219     ierr = PetscCalloc2(maxcnt,&values,maxcnt,&cols);CHKERRQ(ierr);
1220 
1221     row = xs*nc;
1222     /* coupling with process to the left */
1223     for (i=xs; i<xs+s; i++) {
1224       for (j=0; j<nc; j++) {
1225         cnt = 0;
1226         if (rank) {
1227           for (l=0; l<s; l++) {
1228             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1229           }
1230         }
1231         if (dfill) {
1232           for (k=dfill[j]; k<dfill[j+1]; k++) {
1233             cols[cnt++] = i*nc + dfill[k];
1234           }
1235         } else {
1236           for (k=0; k<nc; k++) {
1237             cols[cnt++] = i*nc + k;
1238           }
1239         }
1240         for (l=0; l<s; l++) {
1241           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1242         }
1243         ierr = MatSetValues(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1244         row++;
1245       }
1246     }
1247     for (i=xs+s; i<xs+nx-s; i++) {
1248       for (j=0; j<nc; j++) {
1249         cnt = 0;
1250         for (l=0; l<s; l++) {
1251           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1252         }
1253         if (dfill) {
1254           for (k=dfill[j]; k<dfill[j+1]; k++) {
1255             cols[cnt++] = i*nc + dfill[k];
1256           }
1257         } else {
1258           for (k=0; k<nc; k++) {
1259             cols[cnt++] = i*nc + k;
1260           }
1261         }
1262         for (l=0; l<s; l++) {
1263           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1264         }
1265         ierr = MatSetValues(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1266         row++;
1267       }
1268     }
1269     /* coupling with process to the right */
1270     for (i=xs+nx-s; i<xs+nx; i++) {
1271       for (j=0; j<nc; j++) {
1272         cnt = 0;
1273         for (l=0; l<s; l++) {
1274           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1275         }
1276         if (dfill) {
1277           for (k=dfill[j]; k<dfill[j+1]; k++) {
1278             cols[cnt++] = i*nc + dfill[k];
1279           }
1280         } else {
1281           for (k=0; k<nc; k++) {
1282             cols[cnt++] = i*nc + k;
1283           }
1284         }
1285         if (rank < size-1) {
1286           for (l=0; l<s; l++) {
1287             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1288           }
1289         }
1290         ierr = MatSetValues(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1291         row++;
1292       }
1293     }
1294     ierr = PetscFree2(values,cols);CHKERRQ(ierr);
1295     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1296     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1297     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1298   }
1299   PetscFunctionReturn(0);
1300 }
1301 
1302 /* ---------------------------------------------------------------------------------*/
1303 
1304 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da,Mat J)
1305 {
1306   PetscErrorCode         ierr;
1307   PetscInt               xs,nx,i,i1,slot,gxs,gnx;
1308   PetscInt               m,dim,s,*cols = NULL,nc,*rows = NULL,col,cnt,l;
1309   PetscInt               istart,iend;
1310   PetscScalar            *values;
1311   DMBoundaryType         bx;
1312   ISLocalToGlobalMapping ltog;
1313 
1314   PetscFunctionBegin;
1315   /*
1316          nc - number of components per grid point
1317          col - number of colors needed in one direction for single component problem
1318 
1319   */
1320   ierr = DMDAGetInfo(da,&dim,&m,0,0,0,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr);
1321   col  = 2*s + 1;
1322 
1323   ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
1324   ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
1325 
1326   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1327   ierr = MatSeqAIJSetPreallocation(J,col*nc,0);CHKERRQ(ierr);
1328   ierr = MatMPIAIJSetPreallocation(J,col*nc,0,col*nc,0);CHKERRQ(ierr);
1329 
1330   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1331   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1332 
1333   /*
1334     For each node in the grid: we get the neighbors in the local (on processor ordering
1335     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1336     PETSc ordering.
1337   */
1338   if (!da->prealloc_only) {
1339     ierr = PetscMalloc2(nc,&rows,col*nc*nc,&cols);CHKERRQ(ierr);
1340     ierr = PetscCalloc1(col*nc*nc,&values);CHKERRQ(ierr);
1341     for (i=xs; i<xs+nx; i++) {
1342       istart = PetscMax(-s,gxs - i);
1343       iend   = PetscMin(s,gxs + gnx - i - 1);
1344       slot   = i - gxs;
1345 
1346       cnt = 0;
1347       for (l=0; l<nc; l++) {
1348         for (i1=istart; i1<iend+1; i1++) {
1349           cols[cnt++] = l + nc*(slot + i1);
1350         }
1351         rows[l] = l + nc*(slot);
1352       }
1353       ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1354     }
1355     ierr = PetscFree(values);CHKERRQ(ierr);
1356     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1357     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1358     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1359     ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1360   }
1361   PetscFunctionReturn(0);
1362 }
1363 
1364 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da,Mat J)
1365 {
1366   PetscErrorCode         ierr;
1367   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1368   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
1369   PetscInt               istart,iend,jstart,jend,ii,jj;
1370   MPI_Comm               comm;
1371   PetscScalar            *values;
1372   DMBoundaryType         bx,by;
1373   DMDAStencilType        st;
1374   ISLocalToGlobalMapping ltog;
1375 
1376   PetscFunctionBegin;
1377   /*
1378      nc - number of components per grid point
1379      col - number of colors needed in one direction for single component problem
1380   */
1381   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
1382   col  = 2*s + 1;
1383 
1384   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
1385   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
1386   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1387 
1388   ierr = PetscMalloc1(col*col*nc*nc,&cols);CHKERRQ(ierr);
1389 
1390   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1391 
1392   /* determine the matrix preallocation information */
1393   ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
1394   for (i=xs; i<xs+nx; i++) {
1395     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1396     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1397     for (j=ys; j<ys+ny; j++) {
1398       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1399       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1400       slot   = i - gxs + gnx*(j - gys);
1401 
1402       /* Find block columns in block row */
1403       cnt = 0;
1404       for (ii=istart; ii<iend+1; ii++) {
1405         for (jj=jstart; jj<jend+1; jj++) {
1406           if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1407             cols[cnt++] = slot + ii + gnx*jj;
1408           }
1409         }
1410       }
1411       ierr = MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1412     }
1413   }
1414   ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1415   ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1416   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1417 
1418   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1419 
1420   /*
1421     For each node in the grid: we get the neighbors in the local (on processor ordering
1422     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1423     PETSc ordering.
1424   */
1425   if (!da->prealloc_only) {
1426     ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr);
1427     for (i=xs; i<xs+nx; i++) {
1428       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1429       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1430       for (j=ys; j<ys+ny; j++) {
1431         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1432         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1433         slot = i - gxs + gnx*(j - gys);
1434         cnt  = 0;
1435         for (ii=istart; ii<iend+1; ii++) {
1436           for (jj=jstart; jj<jend+1; jj++) {
1437             if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1438               cols[cnt++] = slot + ii + gnx*jj;
1439             }
1440           }
1441         }
1442         ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1443       }
1444     }
1445     ierr = PetscFree(values);CHKERRQ(ierr);
1446     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1447     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1448     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1449   }
1450   ierr = PetscFree(cols);CHKERRQ(ierr);
1451   PetscFunctionReturn(0);
1452 }
1453 
1454 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da,Mat J)
1455 {
1456   PetscErrorCode         ierr;
1457   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1458   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
1459   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1460   MPI_Comm               comm;
1461   PetscScalar            *values;
1462   DMBoundaryType         bx,by,bz;
1463   DMDAStencilType        st;
1464   ISLocalToGlobalMapping ltog;
1465 
1466   PetscFunctionBegin;
1467   /*
1468          nc - number of components per grid point
1469          col - number of colors needed in one direction for single component problem
1470 
1471   */
1472   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1473   col  = 2*s + 1;
1474 
1475   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1476   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1477   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1478 
1479   ierr = PetscMalloc1(col*col*col,&cols);CHKERRQ(ierr);
1480 
1481   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1482 
1483   /* determine the matrix preallocation information */
1484   ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1485   for (i=xs; i<xs+nx; i++) {
1486     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1487     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1488     for (j=ys; j<ys+ny; j++) {
1489       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1490       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1491       for (k=zs; k<zs+nz; k++) {
1492         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1493         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1494 
1495         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1496 
1497         /* Find block columns in block row */
1498         cnt = 0;
1499         for (ii=istart; ii<iend+1; ii++) {
1500           for (jj=jstart; jj<jend+1; jj++) {
1501             for (kk=kstart; kk<kend+1; kk++) {
1502               if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1503                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1504               }
1505             }
1506           }
1507         }
1508         ierr = MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1509       }
1510     }
1511   }
1512   ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1513   ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1514   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1515 
1516   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1517 
1518   /*
1519     For each node in the grid: we get the neighbors in the local (on processor ordering
1520     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1521     PETSc ordering.
1522   */
1523   if (!da->prealloc_only) {
1524     ierr = PetscCalloc1(col*col*col*nc*nc,&values);CHKERRQ(ierr);
1525     for (i=xs; i<xs+nx; i++) {
1526       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1527       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1528       for (j=ys; j<ys+ny; j++) {
1529         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1530         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1531         for (k=zs; k<zs+nz; k++) {
1532           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1533           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1534 
1535           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1536 
1537           cnt = 0;
1538           for (ii=istart; ii<iend+1; ii++) {
1539             for (jj=jstart; jj<jend+1; jj++) {
1540               for (kk=kstart; kk<kend+1; kk++) {
1541                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1542                   cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1543                 }
1544               }
1545             }
1546           }
1547           ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1548         }
1549       }
1550     }
1551     ierr = PetscFree(values);CHKERRQ(ierr);
1552     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1553     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1554     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1555   }
1556   ierr = PetscFree(cols);CHKERRQ(ierr);
1557   PetscFunctionReturn(0);
1558 }
1559 
1560 /*
1561   This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to
1562   identify in the local ordering with periodic domain.
1563 */
1564 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog,PetscInt *row,PetscInt *cnt,PetscInt col[])
1565 {
1566   PetscErrorCode ierr;
1567   PetscInt       i,n;
1568 
1569   PetscFunctionBegin;
1570   ierr = ISLocalToGlobalMappingApplyBlock(ltog,1,row,row);CHKERRQ(ierr);
1571   ierr = ISLocalToGlobalMappingApplyBlock(ltog,*cnt,col,col);CHKERRQ(ierr);
1572   for (i=0,n=0; i<*cnt; i++) {
1573     if (col[i] >= *row) col[n++] = col[i];
1574   }
1575   *cnt = n;
1576   PetscFunctionReturn(0);
1577 }
1578 
1579 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da,Mat J)
1580 {
1581   PetscErrorCode         ierr;
1582   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1583   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
1584   PetscInt               istart,iend,jstart,jend,ii,jj;
1585   MPI_Comm               comm;
1586   PetscScalar            *values;
1587   DMBoundaryType         bx,by;
1588   DMDAStencilType        st;
1589   ISLocalToGlobalMapping ltog;
1590 
1591   PetscFunctionBegin;
1592   /*
1593      nc - number of components per grid point
1594      col - number of colors needed in one direction for single component problem
1595   */
1596   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
1597   col  = 2*s + 1;
1598 
1599   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
1600   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
1601   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1602 
1603   ierr = PetscMalloc1(col*col*nc*nc,&cols);CHKERRQ(ierr);
1604 
1605   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1606 
1607   /* determine the matrix preallocation information */
1608   ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
1609   for (i=xs; i<xs+nx; i++) {
1610     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1611     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1612     for (j=ys; j<ys+ny; j++) {
1613       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1614       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1615       slot   = i - gxs + gnx*(j - gys);
1616 
1617       /* Find block columns in block row */
1618       cnt = 0;
1619       for (ii=istart; ii<iend+1; ii++) {
1620         for (jj=jstart; jj<jend+1; jj++) {
1621           if (st == DMDA_STENCIL_BOX || !ii || !jj) {
1622             cols[cnt++] = slot + ii + gnx*jj;
1623           }
1624         }
1625       }
1626       ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
1627       ierr = MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
1628     }
1629   }
1630   ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1631   ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1632   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1633 
1634   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1635 
1636   /*
1637     For each node in the grid: we get the neighbors in the local (on processor ordering
1638     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1639     PETSc ordering.
1640   */
1641   if (!da->prealloc_only) {
1642     ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr);
1643     for (i=xs; i<xs+nx; i++) {
1644       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1645       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1646       for (j=ys; j<ys+ny; j++) {
1647         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1648         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1649         slot   = i - gxs + gnx*(j - gys);
1650 
1651         /* Find block columns in block row */
1652         cnt = 0;
1653         for (ii=istart; ii<iend+1; ii++) {
1654           for (jj=jstart; jj<jend+1; jj++) {
1655             if (st == DMDA_STENCIL_BOX || !ii || !jj) {
1656               cols[cnt++] = slot + ii + gnx*jj;
1657             }
1658           }
1659         }
1660         ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
1661         ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1662       }
1663     }
1664     ierr = PetscFree(values);CHKERRQ(ierr);
1665     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1666     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1667     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1668   }
1669   ierr = PetscFree(cols);CHKERRQ(ierr);
1670   PetscFunctionReturn(0);
1671 }
1672 
1673 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da,Mat J)
1674 {
1675   PetscErrorCode         ierr;
1676   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1677   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
1678   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1679   MPI_Comm               comm;
1680   PetscScalar            *values;
1681   DMBoundaryType         bx,by,bz;
1682   DMDAStencilType        st;
1683   ISLocalToGlobalMapping ltog;
1684 
1685   PetscFunctionBegin;
1686   /*
1687      nc - number of components per grid point
1688      col - number of colors needed in one direction for single component problem
1689   */
1690   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1691   col  = 2*s + 1;
1692 
1693   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1694   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1695   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1696 
1697   /* create the matrix */
1698   ierr = PetscMalloc1(col*col*col,&cols);CHKERRQ(ierr);
1699 
1700   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1701 
1702   /* determine the matrix preallocation information */
1703   ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1704   for (i=xs; i<xs+nx; i++) {
1705     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1706     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1707     for (j=ys; j<ys+ny; j++) {
1708       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1709       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1710       for (k=zs; k<zs+nz; k++) {
1711         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1712         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1713 
1714         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1715 
1716         /* Find block columns in block row */
1717         cnt = 0;
1718         for (ii=istart; ii<iend+1; ii++) {
1719           for (jj=jstart; jj<jend+1; jj++) {
1720             for (kk=kstart; kk<kend+1; kk++) {
1721               if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
1722                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1723               }
1724             }
1725           }
1726         }
1727         ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
1728         ierr = MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
1729       }
1730     }
1731   }
1732   ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1733   ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1734   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1735 
1736   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1737 
1738   /*
1739     For each node in the grid: we get the neighbors in the local (on processor ordering
1740     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1741     PETSc ordering.
1742   */
1743   if (!da->prealloc_only) {
1744     ierr = PetscCalloc1(col*col*col*nc*nc,&values);CHKERRQ(ierr);
1745     for (i=xs; i<xs+nx; i++) {
1746       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1747       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1748       for (j=ys; j<ys+ny; j++) {
1749         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1750         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1751         for (k=zs; k<zs+nz; k++) {
1752           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1753           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1754 
1755           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1756 
1757           cnt = 0;
1758           for (ii=istart; ii<iend+1; ii++) {
1759             for (jj=jstart; jj<jend+1; jj++) {
1760               for (kk=kstart; kk<kend+1; kk++) {
1761                 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
1762                   cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1763                 }
1764               }
1765             }
1766           }
1767           ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
1768           ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1769         }
1770       }
1771     }
1772     ierr = PetscFree(values);CHKERRQ(ierr);
1773     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1774     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1775     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1776   }
1777   ierr = PetscFree(cols);CHKERRQ(ierr);
1778   PetscFunctionReturn(0);
1779 }
1780 
1781 /* ---------------------------------------------------------------------------------*/
1782 
1783 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da,Mat J)
1784 {
1785   PetscErrorCode         ierr;
1786   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1787   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt, maxcnt = 0,l,p,*dnz,*onz;
1788   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
1789   DM_DA                  *dd = (DM_DA*)da->data;
1790   PetscInt               ifill_col,*dfill = dd->dfill,*ofill = dd->ofill;
1791   MPI_Comm               comm;
1792   PetscScalar            *values;
1793   DMBoundaryType         bx,by,bz;
1794   ISLocalToGlobalMapping ltog;
1795   DMDAStencilType        st;
1796   PetscBool              removedups = PETSC_FALSE;
1797 
1798   PetscFunctionBegin;
1799   /*
1800          nc - number of components per grid point
1801          col - number of colors needed in one direction for single component problem
1802 
1803   */
1804   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1805   col  = 2*s + 1;
1806   if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
1807                  by 2*stencil_width + 1\n");
1808   if (by == DM_BOUNDARY_PERIODIC && (n % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
1809                  by 2*stencil_width + 1\n");
1810   if (bz == DM_BOUNDARY_PERIODIC && (p % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
1811                  by 2*stencil_width + 1\n");
1812 
1813   /*
1814        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1815        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1816   */
1817   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1818   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1819   if (P == 1 && 2*s >= p) removedups = PETSC_TRUE;
1820 
1821   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1822   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1823   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1824 
1825   ierr = PetscMalloc1(col*col*col*nc,&cols);CHKERRQ(ierr);
1826   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1827 
1828   /* determine the matrix preallocation information */
1829   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1830 
1831   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1832   for (i=xs; i<xs+nx; i++) {
1833     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1834     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1835     for (j=ys; j<ys+ny; j++) {
1836       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1837       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1838       for (k=zs; k<zs+nz; k++) {
1839         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1840         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1841 
1842         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1843 
1844         for (l=0; l<nc; l++) {
1845           cnt = 0;
1846           for (ii=istart; ii<iend+1; ii++) {
1847             for (jj=jstart; jj<jend+1; jj++) {
1848               for (kk=kstart; kk<kend+1; kk++) {
1849                 if (ii || jj || kk) {
1850                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1851                     for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1852                   }
1853                 } else {
1854                   if (dfill) {
1855                     for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1856                   } else {
1857                     for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1858                   }
1859                 }
1860               }
1861             }
1862           }
1863           row  = l + nc*(slot);
1864           maxcnt = PetscMax(maxcnt,cnt);
1865           if (removedups) {
1866             ierr = MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1867           } else {
1868             ierr = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1869           }
1870         }
1871       }
1872     }
1873   }
1874   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1875   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1876   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1877   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1878 
1879   /*
1880     For each node in the grid: we get the neighbors in the local (on processor ordering
1881     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1882     PETSc ordering.
1883   */
1884   if (!da->prealloc_only) {
1885     ierr = PetscCalloc1(maxcnt,&values);CHKERRQ(ierr);
1886     for (i=xs; i<xs+nx; i++) {
1887       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1888       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1889       for (j=ys; j<ys+ny; j++) {
1890         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1891         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1892         for (k=zs; k<zs+nz; k++) {
1893           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1894           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1895 
1896           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1897 
1898           for (l=0; l<nc; l++) {
1899             cnt = 0;
1900             for (ii=istart; ii<iend+1; ii++) {
1901               for (jj=jstart; jj<jend+1; jj++) {
1902                 for (kk=kstart; kk<kend+1; kk++) {
1903                   if (ii || jj || kk) {
1904                     if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1905                       for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1906                     }
1907                   } else {
1908                     if (dfill) {
1909                       for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1910                     } else {
1911                       for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1912                     }
1913                   }
1914                 }
1915               }
1916             }
1917             row  = l + nc*(slot);
1918             ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1919           }
1920         }
1921       }
1922     }
1923     ierr = PetscFree(values);CHKERRQ(ierr);
1924     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1925     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1926     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1927   }
1928   ierr = PetscFree(cols);CHKERRQ(ierr);
1929   PetscFunctionReturn(0);
1930 }
1931