xref: /petsc/src/dm/impls/da/fdda.c (revision 390e1bf27627d887df99a9f4d0d0ad68037f55ec)
1 
2 #include <petsc/private/dmdaimpl.h> /*I      "petscdmda.h"     I*/
3 #include <petscmat.h>
4 
5 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM,ISColoringType,ISColoring*);
6 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM,ISColoringType,ISColoring*);
7 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM,ISColoringType,ISColoring*);
8 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM,ISColoringType,ISColoring*);
9 
10 /*
11    For ghost i that may be negative or greater than the upper bound this
12   maps it into the 0:m-1 range using periodicity
13 */
14 #define SetInRange(i,m) ((i < 0) ? m+i : ((i >= m) ? i-m : i))
15 
16 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill,PetscInt w,PetscInt **rfill)
17 {
18   PetscErrorCode ierr;
19   PetscInt       i,j,nz,*fill;
20 
21   PetscFunctionBegin;
22   if (!dfill) PetscFunctionReturn(0);
23 
24   /* count number nonzeros */
25   nz = 0;
26   for (i=0; i<w; i++) {
27     for (j=0; j<w; j++) {
28       if (dfill[w*i+j]) nz++;
29     }
30   }
31   ierr = PetscMalloc1(nz + w + 1,&fill);CHKERRQ(ierr);
32   /* construct modified CSR storage of nonzero structure */
33   /*  fill[0 -- w] marks starts of each row of column indices (and end of last row)
34    so fill[1] - fill[0] gives number of nonzeros in first row etc */
35   nz = w + 1;
36   for (i=0; i<w; i++) {
37     fill[i] = nz;
38     for (j=0; j<w; j++) {
39       if (dfill[w*i+j]) {
40         fill[nz] = j;
41         nz++;
42       }
43     }
44   }
45   fill[w] = nz;
46 
47   *rfill = fill;
48   PetscFunctionReturn(0);
49 }
50 
51 /*@
52     DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem
53     of the matrix returned by DMCreateMatrix().
54 
55     Logically Collective on DMDA
56 
57     Input Parameter:
58 +   da - the distributed array
59 .   dfill - the fill pattern in the diagonal block (may be NULL, means use dense block)
60 -   ofill - the fill pattern in the off-diagonal blocks
61 
62 
63     Level: developer
64 
65     Notes: This only makes sense when you are doing multicomponent problems but using the
66        MPIAIJ matrix format
67 
68            The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries
69        representing coupling and 0 entries for missing coupling. For example
70 $             dfill[9] = {1, 0, 0,
71 $                         1, 1, 0,
72 $                         0, 1, 1}
73        means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with
74        itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the
75        diagonal block).
76 
77      DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
78      can be represented in the dfill, ofill format
79 
80    Contributed by Glenn Hammond
81 
82 .seealso DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly()
83 
84 @*/
85 PetscErrorCode  DMDASetBlockFills(DM da,const PetscInt *dfill,const PetscInt *ofill)
86 {
87   DM_DA          *dd = (DM_DA*)da->data;
88   PetscErrorCode ierr;
89   PetscInt       i,k,cnt = 1;
90 
91   PetscFunctionBegin;
92   ierr = DMDASetBlockFills_Private(dfill,dd->w,&dd->dfill);CHKERRQ(ierr);
93   ierr = DMDASetBlockFills_Private(ofill,dd->w,&dd->ofill);CHKERRQ(ierr);
94 
95   /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of
96    columns to the left with any nonzeros in them plus 1 */
97   ierr = PetscCalloc1(dd->w,&dd->ofillcols);CHKERRQ(ierr);
98   for (i=0; i<dd->w; i++) {
99     for (k=dd->ofill[i]; k<dd->ofill[i+1]; k++) dd->ofillcols[dd->ofill[k]] = 1;
100   }
101   for (i=0; i<dd->w; i++) {
102     if (dd->ofillcols[i]) {
103       dd->ofillcols[i] = cnt++;
104     }
105   }
106   PetscFunctionReturn(0);
107 }
108 
109 
110 PetscErrorCode  DMCreateColoring_DA(DM da,ISColoringType ctype,ISColoring *coloring)
111 {
112   PetscErrorCode   ierr;
113   PetscInt         dim,m,n,p,nc;
114   DMBoundaryType bx,by,bz;
115   MPI_Comm         comm;
116   PetscMPIInt      size;
117   PetscBool        isBAIJ;
118   DM_DA            *dd = (DM_DA*)da->data;
119 
120   PetscFunctionBegin;
121   /*
122                                   m
123           ------------------------------------------------------
124          |                                                     |
125          |                                                     |
126          |               ----------------------                |
127          |               |                    |                |
128       n  |           yn  |                    |                |
129          |               |                    |                |
130          |               .---------------------                |
131          |             (xs,ys)     xn                          |
132          |            .                                        |
133          |         (gxs,gys)                                   |
134          |                                                     |
135           -----------------------------------------------------
136   */
137 
138   /*
139          nc - number of components per grid point
140          col - number of colors needed in one direction for single component problem
141 
142   */
143   ierr = DMDAGetInfo(da,&dim,0,0,0,&m,&n,&p,&nc,0,&bx,&by,&bz,0);CHKERRQ(ierr);
144 
145   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
146   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
147   if (ctype == IS_COLORING_LOCAL) {
148     if (size == 1) {
149       ctype = IS_COLORING_GLOBAL;
150     } else if (dim > 1) {
151       if ((m==1 && bx == DM_BOUNDARY_PERIODIC) || (n==1 && by == DM_BOUNDARY_PERIODIC) || (p==1 && bz == DM_BOUNDARY_PERIODIC)) {
152         SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain  on the same process");
153       }
154     }
155   }
156 
157   /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ
158      matrices is for the blocks, not the individual matrix elements  */
159   ierr = PetscStrcmp(da->mattype,MATBAIJ,&isBAIJ);CHKERRQ(ierr);
160   if (!isBAIJ) {ierr = PetscStrcmp(da->mattype,MATMPIBAIJ,&isBAIJ);CHKERRQ(ierr);}
161   if (!isBAIJ) {ierr = PetscStrcmp(da->mattype,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);}
162   if (isBAIJ) {
163     dd->w  = 1;
164     dd->xs = dd->xs/nc;
165     dd->xe = dd->xe/nc;
166     dd->Xs = dd->Xs/nc;
167     dd->Xe = dd->Xe/nc;
168   }
169 
170   /*
171      We do not provide a getcoloring function in the DMDA operations because
172    the basic DMDA does not know about matrices. We think of DMDA as being more
173    more low-level then matrices.
174   */
175   if (dim == 1) {
176     ierr = DMCreateColoring_DA_1d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
177   } else if (dim == 2) {
178     ierr =  DMCreateColoring_DA_2d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
179   } else if (dim == 3) {
180     ierr =  DMCreateColoring_DA_3d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
181   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not done for %D dimension, send us mail petsc-maint@mcs.anl.gov for code",dim);
182   if (isBAIJ) {
183     dd->w  = nc;
184     dd->xs = dd->xs*nc;
185     dd->xe = dd->xe*nc;
186     dd->Xs = dd->Xs*nc;
187     dd->Xe = dd->Xe*nc;
188   }
189   PetscFunctionReturn(0);
190 }
191 
192 /* ---------------------------------------------------------------------------------*/
193 
194 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
195 {
196   PetscErrorCode   ierr;
197   PetscInt         xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,M,N,dim,s,k,nc,col;
198   PetscInt         ncolors;
199   MPI_Comm         comm;
200   DMBoundaryType bx,by;
201   DMDAStencilType  st;
202   ISColoringValue  *colors;
203   DM_DA            *dd = (DM_DA*)da->data;
204 
205   PetscFunctionBegin;
206   /*
207          nc - number of components per grid point
208          col - number of colors needed in one direction for single component problem
209 
210   */
211   ierr = DMDAGetInfo(da,&dim,&m,&n,0,&M,&N,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
212   col  = 2*s + 1;
213   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
214   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
215   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
216 
217   /* special case as taught to us by Paul Hovland */
218   if (st == DMDA_STENCIL_STAR && s == 1) {
219     ierr = DMCreateColoring_DA_2d_5pt_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
220   } else {
221 
222     if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ2(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X (%d) is divisible\n\
223                                                             by 2*stencil_width + 1 (%d)\n", m, col);
224     if (by == DM_BOUNDARY_PERIODIC && (n % col)) SETERRQ2(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y (%d) is divisible\n\
225                                                             by 2*stencil_width + 1 (%d)\n", n, col);
226     if (ctype == IS_COLORING_GLOBAL) {
227       if (!dd->localcoloring) {
228         ierr = PetscMalloc1(nc*nx*ny,&colors);CHKERRQ(ierr);
229         ii   = 0;
230         for (j=ys; j<ys+ny; j++) {
231           for (i=xs; i<xs+nx; i++) {
232             for (k=0; k<nc; k++) {
233               colors[ii++] = k + nc*((i % col) + col*(j % col));
234             }
235           }
236         }
237         ncolors = nc + nc*(col-1 + col*(col-1));
238         ierr    = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
239       }
240       *coloring = dd->localcoloring;
241     } else if (ctype == IS_COLORING_LOCAL) {
242       if (!dd->ghostedcoloring) {
243         ierr = PetscMalloc1(nc*gnx*gny,&colors);CHKERRQ(ierr);
244         ii   = 0;
245         for (j=gys; j<gys+gny; j++) {
246           for (i=gxs; i<gxs+gnx; i++) {
247             for (k=0; k<nc; k++) {
248               /* the complicated stuff is to handle periodic boundaries */
249               colors[ii++] = k + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col));
250             }
251           }
252         }
253         ncolors = nc + nc*(col - 1 + col*(col-1));
254         ierr    = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
255         /* PetscIntView(ncolors,(PetscInt*)colors,0); */
256 
257         ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
258       }
259       *coloring = dd->ghostedcoloring;
260     } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
261   }
262   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
263   PetscFunctionReturn(0);
264 }
265 
266 /* ---------------------------------------------------------------------------------*/
267 
268 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
269 {
270   PetscErrorCode   ierr;
271   PetscInt         xs,ys,nx,ny,i,j,gxs,gys,gnx,gny,m,n,p,dim,s,k,nc,col,zs,gzs,ii,l,nz,gnz,M,N,P;
272   PetscInt         ncolors;
273   MPI_Comm         comm;
274   DMBoundaryType bx,by,bz;
275   DMDAStencilType  st;
276   ISColoringValue  *colors;
277   DM_DA            *dd = (DM_DA*)da->data;
278 
279   PetscFunctionBegin;
280   /*
281          nc - number of components per grid point
282          col - number of colors needed in one direction for single component problem
283 
284   */
285   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
286   col  = 2*s + 1;
287   if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
288                                                          by 2*stencil_width + 1\n");
289   if (by == DM_BOUNDARY_PERIODIC && (n % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
290                                                          by 2*stencil_width + 1\n");
291   if (bz == DM_BOUNDARY_PERIODIC && (p % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
292                                                          by 2*stencil_width + 1\n");
293 
294   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
295   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
296   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
297 
298   /* create the coloring */
299   if (ctype == IS_COLORING_GLOBAL) {
300     if (!dd->localcoloring) {
301       ierr = PetscMalloc1(nc*nx*ny*nz,&colors);CHKERRQ(ierr);
302       ii   = 0;
303       for (k=zs; k<zs+nz; k++) {
304         for (j=ys; j<ys+ny; j++) {
305           for (i=xs; i<xs+nx; i++) {
306             for (l=0; l<nc; l++) {
307               colors[ii++] = l + nc*((i % col) + col*(j % col) + col*col*(k % col));
308             }
309           }
310         }
311       }
312       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
313       ierr    = ISColoringCreate(comm,ncolors,nc*nx*ny*nz,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
314     }
315     *coloring = dd->localcoloring;
316   } else if (ctype == IS_COLORING_LOCAL) {
317     if (!dd->ghostedcoloring) {
318       ierr = PetscMalloc1(nc*gnx*gny*gnz,&colors);CHKERRQ(ierr);
319       ii   = 0;
320       for (k=gzs; k<gzs+gnz; k++) {
321         for (j=gys; j<gys+gny; j++) {
322           for (i=gxs; i<gxs+gnx; i++) {
323             for (l=0; l<nc; l++) {
324               /* the complicated stuff is to handle periodic boundaries */
325               colors[ii++] = l + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col) + col*col*(SetInRange(k,p) % col));
326             }
327           }
328         }
329       }
330       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
331       ierr    = ISColoringCreate(comm,ncolors,nc*gnx*gny*gnz,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
332       ierr    = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
333     }
334     *coloring = dd->ghostedcoloring;
335   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
336   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
337   PetscFunctionReturn(0);
338 }
339 
340 /* ---------------------------------------------------------------------------------*/
341 
342 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
343 {
344   PetscErrorCode   ierr;
345   PetscInt         xs,nx,i,i1,gxs,gnx,l,m,M,dim,s,nc,col;
346   PetscInt         ncolors;
347   MPI_Comm         comm;
348   DMBoundaryType bx;
349   ISColoringValue  *colors;
350   DM_DA            *dd = (DM_DA*)da->data;
351 
352   PetscFunctionBegin;
353   /*
354          nc - number of components per grid point
355          col - number of colors needed in one direction for single component problem
356 
357   */
358   ierr = DMDAGetInfo(da,&dim,&m,0,0,&M,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr);
359   col  = 2*s + 1;
360 
361   if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ2(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points %d is divisible\n\
362                                                           by 2*stencil_width + 1 %d\n",(int)m,(int)col);
363 
364   ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
365   ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
366   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
367 
368   /* create the coloring */
369   if (ctype == IS_COLORING_GLOBAL) {
370     if (!dd->localcoloring) {
371       ierr = PetscMalloc1(nc*nx,&colors);CHKERRQ(ierr);
372       if (dd->ofillcols) {
373         PetscInt tc = 0;
374         for (i=0; i<nc; i++) tc += (PetscInt) (dd->ofillcols[i] > 0);
375         i1 = 0;
376         for (i=xs; i<xs+nx; i++) {
377           for (l=0; l<nc; l++) {
378             if (dd->ofillcols[l] && (i % col)) {
379               colors[i1++] =  nc - 1 + tc*((i % col) - 1) + dd->ofillcols[l];
380             } else {
381               colors[i1++] = l;
382             }
383           }
384         }
385         ncolors = nc + 2*s*tc;
386       } else {
387         i1 = 0;
388         for (i=xs; i<xs+nx; i++) {
389           for (l=0; l<nc; l++) {
390             colors[i1++] = l + nc*(i % col);
391           }
392         }
393         ncolors = nc + nc*(col-1);
394       }
395       ierr = ISColoringCreate(comm,ncolors,nc*nx,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
396     }
397     *coloring = dd->localcoloring;
398   } else if (ctype == IS_COLORING_LOCAL) {
399     if (!dd->ghostedcoloring) {
400       ierr = PetscMalloc1(nc*gnx,&colors);CHKERRQ(ierr);
401       i1   = 0;
402       for (i=gxs; i<gxs+gnx; i++) {
403         for (l=0; l<nc; l++) {
404           /* the complicated stuff is to handle periodic boundaries */
405           colors[i1++] = l + nc*(SetInRange(i,m) % col);
406         }
407       }
408       ncolors = nc + nc*(col-1);
409       ierr    = ISColoringCreate(comm,ncolors,nc*gnx,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
410       ierr    = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
411     }
412     *coloring = dd->ghostedcoloring;
413   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
414   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
415   PetscFunctionReturn(0);
416 }
417 
418 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
419 {
420   PetscErrorCode   ierr;
421   PetscInt         xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,dim,s,k,nc;
422   PetscInt         ncolors;
423   MPI_Comm         comm;
424   DMBoundaryType bx,by;
425   ISColoringValue  *colors;
426   DM_DA            *dd = (DM_DA*)da->data;
427 
428   PetscFunctionBegin;
429   /*
430          nc - number of components per grid point
431          col - number of colors needed in one direction for single component problem
432 
433   */
434   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,0);CHKERRQ(ierr);
435   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
436   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
437   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
438 
439   if (bx == DM_BOUNDARY_PERIODIC && (m % 5)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible by 5\n");
440   if (by == DM_BOUNDARY_PERIODIC && (n % 5)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible by 5\n");
441 
442   /* create the coloring */
443   if (ctype == IS_COLORING_GLOBAL) {
444     if (!dd->localcoloring) {
445       ierr = PetscMalloc1(nc*nx*ny,&colors);CHKERRQ(ierr);
446       ii   = 0;
447       for (j=ys; j<ys+ny; j++) {
448         for (i=xs; i<xs+nx; i++) {
449           for (k=0; k<nc; k++) {
450             colors[ii++] = k + nc*((3*j+i) % 5);
451           }
452         }
453       }
454       ncolors = 5*nc;
455       ierr    = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
456     }
457     *coloring = dd->localcoloring;
458   } else if (ctype == IS_COLORING_LOCAL) {
459     if (!dd->ghostedcoloring) {
460       ierr = PetscMalloc1(nc*gnx*gny,&colors);CHKERRQ(ierr);
461       ii = 0;
462       for (j=gys; j<gys+gny; j++) {
463         for (i=gxs; i<gxs+gnx; i++) {
464           for (k=0; k<nc; k++) {
465             colors[ii++] = k + nc*((3*SetInRange(j,n) + SetInRange(i,m)) % 5);
466           }
467         }
468       }
469       ncolors = 5*nc;
470       ierr    = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
471       ierr    = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
472     }
473     *coloring = dd->ghostedcoloring;
474   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
475   PetscFunctionReturn(0);
476 }
477 
478 /* =========================================================================== */
479 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM,Mat);
480 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM,Mat);
481 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM,Mat);
482 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM,Mat);
483 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM,Mat);
484 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM,Mat);
485 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM,Mat);
486 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM,Mat);
487 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM,Mat);
488 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM,Mat);
489 
490 /*@C
491    MatSetupDM - Sets the DMDA that is to be used by the HYPRE_StructMatrix PETSc matrix
492 
493    Logically Collective on Mat
494 
495    Input Parameters:
496 +  mat - the matrix
497 -  da - the da
498 
499    Level: intermediate
500 
501 @*/
502 PetscErrorCode MatSetupDM(Mat mat,DM da)
503 {
504   PetscErrorCode ierr;
505 
506   PetscFunctionBegin;
507   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
508   PetscValidHeaderSpecific(da,DM_CLASSID,1);
509   ierr = PetscTryMethod(mat,"MatSetupDM_C",(Mat,DM),(mat,da));CHKERRQ(ierr);
510   PetscFunctionReturn(0);
511 }
512 
513 PetscErrorCode  MatView_MPI_DA(Mat A,PetscViewer viewer)
514 {
515   DM                da;
516   PetscErrorCode    ierr;
517   const char        *prefix;
518   Mat               Anatural;
519   AO                ao;
520   PetscInt          rstart,rend,*petsc,i;
521   IS                is;
522   MPI_Comm          comm;
523   PetscViewerFormat format;
524 
525   PetscFunctionBegin;
526   /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */
527   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
528   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(0);
529 
530   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
531   ierr = MatGetDM(A, &da);CHKERRQ(ierr);
532   if (!da) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
533 
534   ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);
535   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
536   ierr = PetscMalloc1(rend-rstart,&petsc);CHKERRQ(ierr);
537   for (i=rstart; i<rend; i++) petsc[i-rstart] = i;
538   ierr = AOApplicationToPetsc(ao,rend-rstart,petsc);CHKERRQ(ierr);
539   ierr = ISCreateGeneral(comm,rend-rstart,petsc,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
540 
541   /* call viewer on natural ordering */
542   ierr = MatGetSubMatrix(A,is,is,MAT_INITIAL_MATRIX,&Anatural);CHKERRQ(ierr);
543   ierr = ISDestroy(&is);CHKERRQ(ierr);
544   ierr = PetscObjectGetOptionsPrefix((PetscObject)A,&prefix);CHKERRQ(ierr);
545   ierr = PetscObjectSetOptionsPrefix((PetscObject)Anatural,prefix);CHKERRQ(ierr);
546   ierr = PetscObjectSetName((PetscObject)Anatural,((PetscObject)A)->name);CHKERRQ(ierr);
547   ierr = MatView(Anatural,viewer);CHKERRQ(ierr);
548   ierr = MatDestroy(&Anatural);CHKERRQ(ierr);
549   PetscFunctionReturn(0);
550 }
551 
552 PetscErrorCode  MatLoad_MPI_DA(Mat A,PetscViewer viewer)
553 {
554   DM             da;
555   PetscErrorCode ierr;
556   Mat            Anatural,Aapp;
557   AO             ao;
558   PetscInt       rstart,rend,*app,i,m,n,M,N;
559   IS             is;
560   MPI_Comm       comm;
561 
562   PetscFunctionBegin;
563   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
564   ierr = MatGetDM(A, &da);CHKERRQ(ierr);
565   if (!da) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
566 
567   /* Load the matrix in natural ordering */
568   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Anatural);CHKERRQ(ierr);
569   ierr = MatSetType(Anatural,((PetscObject)A)->type_name);CHKERRQ(ierr);
570   ierr = MatGetSize(A,&M,&N);CHKERRQ(ierr);
571   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
572   ierr = MatSetSizes(Anatural,m,n,M,N);CHKERRQ(ierr);
573   ierr = MatLoad(Anatural,viewer);CHKERRQ(ierr);
574 
575   /* Map natural ordering to application ordering and create IS */
576   ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);
577   ierr = MatGetOwnershipRange(Anatural,&rstart,&rend);CHKERRQ(ierr);
578   ierr = PetscMalloc1(rend-rstart,&app);CHKERRQ(ierr);
579   for (i=rstart; i<rend; i++) app[i-rstart] = i;
580   ierr = AOPetscToApplication(ao,rend-rstart,app);CHKERRQ(ierr);
581   ierr = ISCreateGeneral(comm,rend-rstart,app,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
582 
583   /* Do permutation and replace header */
584   ierr = MatGetSubMatrix(Anatural,is,is,MAT_INITIAL_MATRIX,&Aapp);CHKERRQ(ierr);
585   ierr = MatHeaderReplace(A,&Aapp);CHKERRQ(ierr);
586   ierr = ISDestroy(&is);CHKERRQ(ierr);
587   ierr = MatDestroy(&Anatural);CHKERRQ(ierr);
588   PetscFunctionReturn(0);
589 }
590 
591 PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J)
592 {
593   PetscErrorCode ierr;
594   PetscInt       dim,dof,nx,ny,nz,dims[3],starts[3],M,N,P;
595   Mat            A;
596   MPI_Comm       comm;
597   MatType        Atype;
598   PetscSection   section, sectionGlobal;
599   void           (*aij)(void)=NULL,(*baij)(void)=NULL,(*sbaij)(void)=NULL;
600   MatType        mtype;
601   PetscMPIInt    size;
602   DM_DA          *dd = (DM_DA*)da->data;
603 
604   PetscFunctionBegin;
605   ierr = MatInitializePackage();CHKERRQ(ierr);
606   mtype = da->mattype;
607 
608   ierr = DMGetDefaultSection(da, &section);CHKERRQ(ierr);
609   if (section) {
610     PetscInt  bs = -1;
611     PetscInt  localSize;
612     PetscBool isShell, isBlock, isSeqBlock, isMPIBlock, isSymBlock, isSymSeqBlock, isSymMPIBlock, isSymmetric;
613 
614     ierr = DMGetDefaultGlobalSection(da, &sectionGlobal);CHKERRQ(ierr);
615     ierr = PetscSectionGetConstrainedStorageSize(sectionGlobal, &localSize);CHKERRQ(ierr);
616     ierr = MatCreate(PetscObjectComm((PetscObject)da), J);CHKERRQ(ierr);
617     ierr = MatSetSizes(*J, localSize, localSize, PETSC_DETERMINE, PETSC_DETERMINE);CHKERRQ(ierr);
618     ierr = MatSetType(*J, mtype);CHKERRQ(ierr);
619     ierr = MatSetFromOptions(*J);CHKERRQ(ierr);
620     ierr = PetscStrcmp(mtype, MATSHELL, &isShell);CHKERRQ(ierr);
621     ierr = PetscStrcmp(mtype, MATBAIJ, &isBlock);CHKERRQ(ierr);
622     ierr = PetscStrcmp(mtype, MATSEQBAIJ, &isSeqBlock);CHKERRQ(ierr);
623     ierr = PetscStrcmp(mtype, MATMPIBAIJ, &isMPIBlock);CHKERRQ(ierr);
624     ierr = PetscStrcmp(mtype, MATSBAIJ, &isSymBlock);CHKERRQ(ierr);
625     ierr = PetscStrcmp(mtype, MATSEQSBAIJ, &isSymSeqBlock);CHKERRQ(ierr);
626     ierr = PetscStrcmp(mtype, MATMPISBAIJ, &isSymMPIBlock);CHKERRQ(ierr);
627     /* Check for symmetric storage */
628     isSymmetric = (PetscBool) (isSymBlock || isSymSeqBlock || isSymMPIBlock);
629     if (isSymmetric) {
630       ierr = MatSetOption(*J, MAT_IGNORE_LOWER_TRIANGULAR, PETSC_TRUE);CHKERRQ(ierr);
631     }
632     if (!isShell) {
633       PetscInt *dnz, *onz, *dnzu, *onzu, bsLocal;
634 
635       if (bs < 0) {
636         if (isBlock || isSeqBlock || isMPIBlock || isSymBlock || isSymSeqBlock || isSymMPIBlock) {
637           PetscInt pStart, pEnd, p, dof;
638 
639           ierr = PetscSectionGetChart(sectionGlobal, &pStart, &pEnd);CHKERRQ(ierr);
640           for (p = pStart; p < pEnd; ++p) {
641             ierr = PetscSectionGetDof(sectionGlobal, p, &dof);CHKERRQ(ierr);
642             if (dof) {
643               bs = dof;
644               break;
645             }
646           }
647         } else {
648           bs = 1;
649         }
650         /* Must have same blocksize on all procs (some might have no points) */
651         bsLocal = bs;
652         ierr    = MPIU_Allreduce(&bsLocal, &bs, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)da));CHKERRQ(ierr);
653       }
654       ierr = PetscCalloc4(localSize/bs, &dnz, localSize/bs, &onz, localSize/bs, &dnzu, localSize/bs, &onzu);CHKERRQ(ierr);
655       /* ierr = DMPlexPreallocateOperator(dm, bs, section, sectionGlobal, dnz, onz, dnzu, onzu, *J, fillMatrix);CHKERRQ(ierr); */
656       ierr = PetscFree4(dnz, onz, dnzu, onzu);CHKERRQ(ierr);
657     }
658   }
659   /*
660                                   m
661           ------------------------------------------------------
662          |                                                     |
663          |                                                     |
664          |               ----------------------                |
665          |               |                    |                |
666       n  |           ny  |                    |                |
667          |               |                    |                |
668          |               .---------------------                |
669          |             (xs,ys)     nx                          |
670          |            .                                        |
671          |         (gxs,gys)                                   |
672          |                                                     |
673           -----------------------------------------------------
674   */
675 
676   /*
677          nc - number of components per grid point
678          col - number of colors needed in one direction for single component problem
679 
680   */
681   M   = dd->M;
682   N   = dd->N;
683   P   = dd->P;
684   dim = da->dim;
685   dof = dd->w;
686   /* ierr = DMDAGetInfo(da,&dim,&M,&N,&P,0,0,0,&dof,0,0,0,0,0);CHKERRQ(ierr); */
687   ierr = DMDAGetCorners(da,0,0,0,&nx,&ny,&nz);CHKERRQ(ierr);
688   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
689   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
690   ierr = MatSetSizes(A,dof*nx*ny*nz,dof*nx*ny*nz,dof*M*N*P,dof*M*N*P);CHKERRQ(ierr);
691   ierr = MatSetType(A,mtype);CHKERRQ(ierr);
692   ierr = MatSetDM(A,da);CHKERRQ(ierr);
693   ierr = MatSetFromOptions(A);CHKERRQ(ierr);
694   ierr = MatGetType(A,&Atype);CHKERRQ(ierr);
695   /*
696      We do not provide a getmatrix function in the DMDA operations because
697    the basic DMDA does not know about matrices. We think of DMDA as being more
698    more low-level than matrices. This is kind of cheating but, cause sometimes
699    we think of DMDA has higher level than matrices.
700 
701      We could switch based on Atype (or mtype), but we do not since the
702    specialized setting routines depend only the particular preallocation
703    details of the matrix, not the type itself.
704   */
705   ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJSetPreallocation_C",&aij);CHKERRQ(ierr);
706   if (!aij) {
707     ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJSetPreallocation_C",&aij);CHKERRQ(ierr);
708   }
709   if (!aij) {
710     ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIBAIJSetPreallocation_C",&baij);CHKERRQ(ierr);
711     if (!baij) {
712       ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",&baij);CHKERRQ(ierr);
713     }
714     if (!baij) {
715       ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPISBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr);
716       if (!sbaij) {
717         ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr);
718       }
719     }
720   }
721   if (aij) {
722     if (dim == 1) {
723       if (dd->ofill) {
724         ierr = DMCreateMatrix_DA_1d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
725       } else {
726         ierr = DMCreateMatrix_DA_1d_MPIAIJ(da,A);CHKERRQ(ierr);
727       }
728     } else if (dim == 2) {
729       if (dd->ofill) {
730         ierr = DMCreateMatrix_DA_2d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
731       } else {
732         ierr = DMCreateMatrix_DA_2d_MPIAIJ(da,A);CHKERRQ(ierr);
733       }
734     } else if (dim == 3) {
735       if (dd->ofill) {
736         ierr = DMCreateMatrix_DA_3d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
737       } else {
738         ierr = DMCreateMatrix_DA_3d_MPIAIJ(da,A);CHKERRQ(ierr);
739       }
740     }
741   } else if (baij) {
742     if (dim == 2) {
743       ierr = DMCreateMatrix_DA_2d_MPIBAIJ(da,A);CHKERRQ(ierr);
744     } else if (dim == 3) {
745       ierr = DMCreateMatrix_DA_3d_MPIBAIJ(da,A);CHKERRQ(ierr);
746     } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
747   } else if (sbaij) {
748     if (dim == 2) {
749       ierr = DMCreateMatrix_DA_2d_MPISBAIJ(da,A);CHKERRQ(ierr);
750     } else if (dim == 3) {
751       ierr = DMCreateMatrix_DA_3d_MPISBAIJ(da,A);CHKERRQ(ierr);
752     } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
753   } else {
754     ISLocalToGlobalMapping ltog;
755     ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
756     ierr = MatSetUp(A);CHKERRQ(ierr);
757     ierr = MatSetLocalToGlobalMapping(A,ltog,ltog);CHKERRQ(ierr);
758   }
759   ierr = DMDAGetGhostCorners(da,&starts[0],&starts[1],&starts[2],&dims[0],&dims[1],&dims[2]);CHKERRQ(ierr);
760   ierr = MatSetStencil(A,dim,dims,starts,dof);CHKERRQ(ierr);
761   ierr = MatSetDM(A,da);CHKERRQ(ierr);
762   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
763   if (size > 1) {
764     /* change viewer to display matrix in natural ordering */
765     ierr = MatShellSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA);CHKERRQ(ierr);
766     ierr = MatShellSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA);CHKERRQ(ierr);
767   }
768   *J = A;
769   PetscFunctionReturn(0);
770 }
771 
772 /* ---------------------------------------------------------------------------------*/
773 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da,Mat J)
774 {
775   PetscErrorCode         ierr;
776   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,M,N;
777   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
778   MPI_Comm               comm;
779   PetscScalar            *values;
780   DMBoundaryType         bx,by;
781   ISLocalToGlobalMapping ltog;
782   DMDAStencilType        st;
783   PetscBool              removedups = PETSC_FALSE;
784 
785   PetscFunctionBegin;
786   /*
787          nc - number of components per grid point
788          col - number of colors needed in one direction for single component problem
789 
790   */
791   ierr = DMDAGetInfo(da,&dim,&m,&n,&M,&N,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
792   col  = 2*s + 1;
793   /*
794        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
795        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
796   */
797   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
798   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
799   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
800   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
801   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
802 
803   ierr = PetscMalloc2(nc,&rows,col*col*nc*nc,&cols);CHKERRQ(ierr);
804   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
805 
806   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
807   /* determine the matrix preallocation information */
808   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
809   for (i=xs; i<xs+nx; i++) {
810 
811     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
812     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
813 
814     for (j=ys; j<ys+ny; j++) {
815       slot = i - gxs + gnx*(j - gys);
816 
817       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
818       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
819 
820       cnt = 0;
821       for (k=0; k<nc; k++) {
822         for (l=lstart; l<lend+1; l++) {
823           for (p=pstart; p<pend+1; p++) {
824             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
825               cols[cnt++] = k + nc*(slot + gnx*l + p);
826             }
827           }
828         }
829         rows[k] = k + nc*(slot);
830       }
831       if (removedups) {
832         ierr = MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
833       } else {
834         ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
835       }
836     }
837   }
838   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
839   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
840   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
841   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
842 
843   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
844 
845   /*
846     For each node in the grid: we get the neighbors in the local (on processor ordering
847     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
848     PETSc ordering.
849   */
850   if (!da->prealloc_only) {
851     ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr);
852     for (i=xs; i<xs+nx; i++) {
853 
854       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
855       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
856 
857       for (j=ys; j<ys+ny; j++) {
858         slot = i - gxs + gnx*(j - gys);
859 
860         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
861         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
862 
863         cnt = 0;
864         for (k=0; k<nc; k++) {
865           for (l=lstart; l<lend+1; l++) {
866             for (p=pstart; p<pend+1; p++) {
867               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
868                 cols[cnt++] = k + nc*(slot + gnx*l + p);
869               }
870             }
871           }
872           rows[k] = k + nc*(slot);
873         }
874         ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
875       }
876     }
877     ierr = PetscFree(values);CHKERRQ(ierr);
878     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
879     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
880     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
881   }
882   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
883   PetscFunctionReturn(0);
884 }
885 
886 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da,Mat J)
887 {
888   PetscErrorCode         ierr;
889   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
890   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt,maxcnt = 0,l,p,M,N;
891   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
892   DM_DA                  *dd = (DM_DA*)da->data;
893   PetscInt               ifill_col,*ofill = dd->ofill, *dfill = dd->dfill;
894   MPI_Comm               comm;
895   PetscScalar            *values;
896   DMBoundaryType         bx,by;
897   ISLocalToGlobalMapping ltog;
898   DMDAStencilType        st;
899   PetscBool              removedups = PETSC_FALSE;
900 
901   PetscFunctionBegin;
902   /*
903          nc - number of components per grid point
904          col - number of colors needed in one direction for single component problem
905 
906   */
907   ierr = DMDAGetInfo(da,&dim,&m,&n,&M,&N,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
908   col  = 2*s + 1;
909   /*
910        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
911        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
912   */
913   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
914   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
915   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
916   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
917   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
918 
919   ierr = PetscMalloc1(col*col*nc,&cols);CHKERRQ(ierr);
920   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
921 
922   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
923   /* determine the matrix preallocation information */
924   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
925   for (i=xs; i<xs+nx; i++) {
926 
927     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
928     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
929 
930     for (j=ys; j<ys+ny; j++) {
931       slot = i - gxs + gnx*(j - gys);
932 
933       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
934       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
935 
936       for (k=0; k<nc; k++) {
937         cnt = 0;
938         for (l=lstart; l<lend+1; l++) {
939           for (p=pstart; p<pend+1; p++) {
940             if (l || p) {
941               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
942                 for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p);
943               }
944             } else {
945               if (dfill) {
946                 for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p);
947               } else {
948                 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p);
949               }
950             }
951           }
952         }
953         row    = k + nc*(slot);
954         maxcnt = PetscMax(maxcnt,cnt);
955         if (removedups) {
956           ierr   = MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
957         } else {
958           ierr   = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
959         }
960       }
961     }
962   }
963   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
964   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
965   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
966   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
967 
968   /*
969     For each node in the grid: we get the neighbors in the local (on processor ordering
970     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
971     PETSc ordering.
972   */
973   if (!da->prealloc_only) {
974     ierr = PetscCalloc1(maxcnt,&values);CHKERRQ(ierr);
975     for (i=xs; i<xs+nx; i++) {
976 
977       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
978       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
979 
980       for (j=ys; j<ys+ny; j++) {
981         slot = i - gxs + gnx*(j - gys);
982 
983         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
984         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
985 
986         for (k=0; k<nc; k++) {
987           cnt = 0;
988           for (l=lstart; l<lend+1; l++) {
989             for (p=pstart; p<pend+1; p++) {
990               if (l || p) {
991                 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
992                   for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p);
993                 }
994               } else {
995                 if (dfill) {
996                   for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p);
997                 } else {
998                   for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p);
999                 }
1000               }
1001             }
1002           }
1003           row  = k + nc*(slot);
1004           ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1005         }
1006       }
1007     }
1008     ierr = PetscFree(values);CHKERRQ(ierr);
1009     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1010     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1011     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1012   }
1013   ierr = PetscFree(cols);CHKERRQ(ierr);
1014   PetscFunctionReturn(0);
1015 }
1016 
1017 /* ---------------------------------------------------------------------------------*/
1018 
1019 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da,Mat J)
1020 {
1021   PetscErrorCode         ierr;
1022   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1023   PetscInt               m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL;
1024   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
1025   MPI_Comm               comm;
1026   PetscScalar            *values;
1027   DMBoundaryType         bx,by,bz;
1028   ISLocalToGlobalMapping ltog;
1029   DMDAStencilType        st;
1030   PetscBool              removedups = PETSC_FALSE;
1031 
1032   PetscFunctionBegin;
1033   /*
1034          nc - number of components per grid point
1035          col - number of colors needed in one direction for single component problem
1036 
1037   */
1038   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1039   col  = 2*s + 1;
1040 
1041   /*
1042        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1043        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1044   */
1045   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1046   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1047   if (P == 1 && 2*s >= p) removedups = PETSC_TRUE;
1048 
1049   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1050   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1051   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1052 
1053   ierr = PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols);CHKERRQ(ierr);
1054   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1055 
1056   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1057   /* determine the matrix preallocation information */
1058   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1059   for (i=xs; i<xs+nx; i++) {
1060     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1061     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1062     for (j=ys; j<ys+ny; j++) {
1063       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1064       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1065       for (k=zs; k<zs+nz; k++) {
1066         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1067         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1068 
1069         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1070 
1071         cnt = 0;
1072         for (l=0; l<nc; l++) {
1073           for (ii=istart; ii<iend+1; ii++) {
1074             for (jj=jstart; jj<jend+1; jj++) {
1075               for (kk=kstart; kk<kend+1; kk++) {
1076                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1077                   cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1078                 }
1079               }
1080             }
1081           }
1082           rows[l] = l + nc*(slot);
1083         }
1084         if (removedups) {
1085           ierr = MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1086         } else {
1087           ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1088         }
1089       }
1090     }
1091   }
1092   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1093   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1094   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1095   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1096   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1097 
1098   /*
1099     For each node in the grid: we get the neighbors in the local (on processor ordering
1100     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1101     PETSc ordering.
1102   */
1103   if (!da->prealloc_only) {
1104     ierr = PetscCalloc1(col*col*col*nc*nc*nc,&values);CHKERRQ(ierr);
1105     for (i=xs; i<xs+nx; i++) {
1106       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1107       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1108       for (j=ys; j<ys+ny; j++) {
1109         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1110         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1111         for (k=zs; k<zs+nz; k++) {
1112           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1113           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1114 
1115           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1116 
1117           cnt = 0;
1118           for (l=0; l<nc; l++) {
1119             for (ii=istart; ii<iend+1; ii++) {
1120               for (jj=jstart; jj<jend+1; jj++) {
1121                 for (kk=kstart; kk<kend+1; kk++) {
1122                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1123                     cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1124                   }
1125                 }
1126               }
1127             }
1128             rows[l] = l + nc*(slot);
1129           }
1130           ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1131         }
1132       }
1133     }
1134     ierr = PetscFree(values);CHKERRQ(ierr);
1135     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1136     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1137     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1138   }
1139   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1140   PetscFunctionReturn(0);
1141 }
1142 
1143 /* ---------------------------------------------------------------------------------*/
1144 
1145 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da,Mat J)
1146 {
1147   PetscErrorCode         ierr;
1148   DM_DA                  *dd = (DM_DA*)da->data;
1149   PetscInt               xs,nx,i,j,gxs,gnx,row,k,l;
1150   PetscInt               m,dim,s,*cols = NULL,nc,cnt,maxcnt = 0,*ocols;
1151   PetscInt               *ofill = dd->ofill,*dfill = dd->dfill;
1152   PetscScalar            *values;
1153   DMBoundaryType         bx;
1154   ISLocalToGlobalMapping ltog;
1155   PetscMPIInt            rank,size;
1156 
1157   PetscFunctionBegin;
1158   if (dd->bx == DM_BOUNDARY_PERIODIC) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"With fill provided not implemented with periodic boundary conditions");
1159   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)da),&rank);CHKERRQ(ierr);
1160   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)da),&size);CHKERRQ(ierr);
1161 
1162   /*
1163          nc - number of components per grid point
1164 
1165   */
1166   ierr = DMDAGetInfo(da,&dim,&m,0,0,0,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr);
1167   ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
1168   ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
1169 
1170   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1171   ierr = PetscCalloc2(nx*nc,&cols,nx*nc,&ocols);CHKERRQ(ierr);
1172 
1173   if (nx < 2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Need at least two grid points per process");
1174   /*
1175         note should be smaller for first and last process with no periodic
1176         does not handle dfill
1177   */
1178   cnt = 0;
1179   /* coupling with process to the left */
1180   for (i=0; i<s; i++) {
1181     for (j=0; j<nc; j++) {
1182       ocols[cnt] = ((!rank) ? 0 : (s - i)*(ofill[j+1] - ofill[j]));
1183       cols[cnt]  = dfill[j+1] - dfill[j] + (s + i)*(ofill[j+1] - ofill[j]);
1184       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1185       cnt++;
1186     }
1187   }
1188   for (i=s; i<nx-s; i++) {
1189     for (j=0; j<nc; j++) {
1190       cols[cnt] = dfill[j+1] - dfill[j] + 2*s*(ofill[j+1] - ofill[j]);
1191       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1192       cnt++;
1193     }
1194   }
1195   /* coupling with process to the right */
1196   for (i=nx-s; i<nx; i++) {
1197     for (j=0; j<nc; j++) {
1198       ocols[cnt] = ((rank == (size-1)) ? 0 : (i - nx + s + 1)*(ofill[j+1] - ofill[j]));
1199       cols[cnt]  = dfill[j+1] - dfill[j] + (s + nx - i - 1)*(ofill[j+1] - ofill[j]);
1200       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1201       cnt++;
1202     }
1203   }
1204 
1205   ierr = MatSeqAIJSetPreallocation(J,0,cols);CHKERRQ(ierr);
1206   ierr = MatMPIAIJSetPreallocation(J,0,cols,0,ocols);CHKERRQ(ierr);
1207   ierr = PetscFree2(cols,ocols);CHKERRQ(ierr);
1208 
1209   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1210   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1211 
1212   /*
1213     For each node in the grid: we get the neighbors in the local (on processor ordering
1214     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1215     PETSc ordering.
1216   */
1217   if (!da->prealloc_only) {
1218     ierr = PetscCalloc2(maxcnt,&values,maxcnt,&cols);CHKERRQ(ierr);
1219 
1220     row = xs*nc;
1221     /* coupling with process to the left */
1222     for (i=xs; i<xs+s; i++) {
1223       for (j=0; j<nc; j++) {
1224         cnt = 0;
1225         if (rank) {
1226           for (l=0; l<s; l++) {
1227             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1228           }
1229         }
1230         if (dfill) {
1231           for (k=dfill[j]; k<dfill[j+1]; k++) {
1232             cols[cnt++] = i*nc + dfill[k];
1233           }
1234         } else {
1235           for (k=0; k<nc; k++) {
1236             cols[cnt++] = i*nc + k;
1237           }
1238         }
1239         for (l=0; l<s; l++) {
1240           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1241         }
1242         ierr = MatSetValues(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1243         row++;
1244       }
1245     }
1246     for (i=xs+s; i<xs+nx-s; i++) {
1247       for (j=0; j<nc; j++) {
1248         cnt = 0;
1249         for (l=0; l<s; l++) {
1250           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1251         }
1252         if (dfill) {
1253           for (k=dfill[j]; k<dfill[j+1]; k++) {
1254             cols[cnt++] = i*nc + dfill[k];
1255           }
1256         } else {
1257           for (k=0; k<nc; k++) {
1258             cols[cnt++] = i*nc + k;
1259           }
1260         }
1261         for (l=0; l<s; l++) {
1262           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1263         }
1264         ierr = MatSetValues(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1265         row++;
1266       }
1267     }
1268     /* coupling with process to the right */
1269     for (i=xs+nx-s; i<xs+nx; i++) {
1270       for (j=0; j<nc; j++) {
1271         cnt = 0;
1272         for (l=0; l<s; l++) {
1273           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1274         }
1275         if (dfill) {
1276           for (k=dfill[j]; k<dfill[j+1]; k++) {
1277             cols[cnt++] = i*nc + dfill[k];
1278           }
1279         } else {
1280           for (k=0; k<nc; k++) {
1281             cols[cnt++] = i*nc + k;
1282           }
1283         }
1284         if (rank < size-1) {
1285           for (l=0; l<s; l++) {
1286             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1287           }
1288         }
1289         ierr = MatSetValues(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1290         row++;
1291       }
1292     }
1293     ierr = PetscFree2(values,cols);CHKERRQ(ierr);
1294     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1295     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1296     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1297   }
1298   PetscFunctionReturn(0);
1299 }
1300 
1301 /* ---------------------------------------------------------------------------------*/
1302 
1303 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da,Mat J)
1304 {
1305   PetscErrorCode         ierr;
1306   PetscInt               xs,nx,i,i1,slot,gxs,gnx;
1307   PetscInt               m,dim,s,*cols = NULL,nc,*rows = NULL,col,cnt,l;
1308   PetscInt               istart,iend;
1309   PetscScalar            *values;
1310   DMBoundaryType         bx;
1311   ISLocalToGlobalMapping ltog;
1312 
1313   PetscFunctionBegin;
1314   /*
1315          nc - number of components per grid point
1316          col - number of colors needed in one direction for single component problem
1317 
1318   */
1319   ierr = DMDAGetInfo(da,&dim,&m,0,0,0,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr);
1320   col  = 2*s + 1;
1321 
1322   ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
1323   ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
1324 
1325   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1326   ierr = MatSeqAIJSetPreallocation(J,col*nc,0);CHKERRQ(ierr);
1327   ierr = MatMPIAIJSetPreallocation(J,col*nc,0,col*nc,0);CHKERRQ(ierr);
1328 
1329   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1330   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1331 
1332   /*
1333     For each node in the grid: we get the neighbors in the local (on processor ordering
1334     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1335     PETSc ordering.
1336   */
1337   if (!da->prealloc_only) {
1338     ierr = PetscMalloc2(nc,&rows,col*nc*nc,&cols);CHKERRQ(ierr);
1339     ierr = PetscCalloc1(col*nc*nc,&values);CHKERRQ(ierr);
1340     for (i=xs; i<xs+nx; i++) {
1341       istart = PetscMax(-s,gxs - i);
1342       iend   = PetscMin(s,gxs + gnx - i - 1);
1343       slot   = i - gxs;
1344 
1345       cnt = 0;
1346       for (l=0; l<nc; l++) {
1347         for (i1=istart; i1<iend+1; i1++) {
1348           cols[cnt++] = l + nc*(slot + i1);
1349         }
1350         rows[l] = l + nc*(slot);
1351       }
1352       ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1353     }
1354     ierr = PetscFree(values);CHKERRQ(ierr);
1355     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1356     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1357     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1358     ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1359   }
1360   PetscFunctionReturn(0);
1361 }
1362 
1363 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da,Mat J)
1364 {
1365   PetscErrorCode         ierr;
1366   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1367   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
1368   PetscInt               istart,iend,jstart,jend,ii,jj;
1369   MPI_Comm               comm;
1370   PetscScalar            *values;
1371   DMBoundaryType         bx,by;
1372   DMDAStencilType        st;
1373   ISLocalToGlobalMapping ltog;
1374 
1375   PetscFunctionBegin;
1376   /*
1377      nc - number of components per grid point
1378      col - number of colors needed in one direction for single component problem
1379   */
1380   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
1381   col  = 2*s + 1;
1382 
1383   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
1384   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
1385   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1386 
1387   ierr = PetscMalloc1(col*col*nc*nc,&cols);CHKERRQ(ierr);
1388 
1389   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1390 
1391   /* determine the matrix preallocation information */
1392   ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
1393   for (i=xs; i<xs+nx; i++) {
1394     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1395     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1396     for (j=ys; j<ys+ny; j++) {
1397       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1398       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1399       slot   = i - gxs + gnx*(j - gys);
1400 
1401       /* Find block columns in block row */
1402       cnt = 0;
1403       for (ii=istart; ii<iend+1; ii++) {
1404         for (jj=jstart; jj<jend+1; jj++) {
1405           if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1406             cols[cnt++] = slot + ii + gnx*jj;
1407           }
1408         }
1409       }
1410       ierr = MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1411     }
1412   }
1413   ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1414   ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1415   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1416 
1417   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1418 
1419   /*
1420     For each node in the grid: we get the neighbors in the local (on processor ordering
1421     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1422     PETSc ordering.
1423   */
1424   if (!da->prealloc_only) {
1425     ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr);
1426     for (i=xs; i<xs+nx; i++) {
1427       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1428       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1429       for (j=ys; j<ys+ny; j++) {
1430         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1431         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1432         slot = i - gxs + gnx*(j - gys);
1433         cnt  = 0;
1434         for (ii=istart; ii<iend+1; ii++) {
1435           for (jj=jstart; jj<jend+1; jj++) {
1436             if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1437               cols[cnt++] = slot + ii + gnx*jj;
1438             }
1439           }
1440         }
1441         ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1442       }
1443     }
1444     ierr = PetscFree(values);CHKERRQ(ierr);
1445     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1446     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1447     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1448   }
1449   ierr = PetscFree(cols);CHKERRQ(ierr);
1450   PetscFunctionReturn(0);
1451 }
1452 
1453 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da,Mat J)
1454 {
1455   PetscErrorCode         ierr;
1456   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1457   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
1458   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1459   MPI_Comm               comm;
1460   PetscScalar            *values;
1461   DMBoundaryType         bx,by,bz;
1462   DMDAStencilType        st;
1463   ISLocalToGlobalMapping ltog;
1464 
1465   PetscFunctionBegin;
1466   /*
1467          nc - number of components per grid point
1468          col - number of colors needed in one direction for single component problem
1469 
1470   */
1471   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1472   col  = 2*s + 1;
1473 
1474   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1475   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1476   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1477 
1478   ierr = PetscMalloc1(col*col*col,&cols);CHKERRQ(ierr);
1479 
1480   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1481 
1482   /* determine the matrix preallocation information */
1483   ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1484   for (i=xs; i<xs+nx; i++) {
1485     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1486     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1487     for (j=ys; j<ys+ny; j++) {
1488       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1489       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1490       for (k=zs; k<zs+nz; k++) {
1491         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1492         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1493 
1494         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1495 
1496         /* Find block columns in block row */
1497         cnt = 0;
1498         for (ii=istart; ii<iend+1; ii++) {
1499           for (jj=jstart; jj<jend+1; jj++) {
1500             for (kk=kstart; kk<kend+1; kk++) {
1501               if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1502                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1503               }
1504             }
1505           }
1506         }
1507         ierr = MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1508       }
1509     }
1510   }
1511   ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1512   ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1513   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1514 
1515   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1516 
1517   /*
1518     For each node in the grid: we get the neighbors in the local (on processor ordering
1519     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1520     PETSc ordering.
1521   */
1522   if (!da->prealloc_only) {
1523     ierr = PetscCalloc1(col*col*col*nc*nc,&values);CHKERRQ(ierr);
1524     for (i=xs; i<xs+nx; i++) {
1525       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1526       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1527       for (j=ys; j<ys+ny; j++) {
1528         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1529         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1530         for (k=zs; k<zs+nz; k++) {
1531           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1532           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1533 
1534           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1535 
1536           cnt = 0;
1537           for (ii=istart; ii<iend+1; ii++) {
1538             for (jj=jstart; jj<jend+1; jj++) {
1539               for (kk=kstart; kk<kend+1; kk++) {
1540                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1541                   cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1542                 }
1543               }
1544             }
1545           }
1546           ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1547         }
1548       }
1549     }
1550     ierr = PetscFree(values);CHKERRQ(ierr);
1551     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1552     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1553     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1554   }
1555   ierr = PetscFree(cols);CHKERRQ(ierr);
1556   PetscFunctionReturn(0);
1557 }
1558 
1559 /*
1560   This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to
1561   identify in the local ordering with periodic domain.
1562 */
1563 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog,PetscInt *row,PetscInt *cnt,PetscInt col[])
1564 {
1565   PetscErrorCode ierr;
1566   PetscInt       i,n;
1567 
1568   PetscFunctionBegin;
1569   ierr = ISLocalToGlobalMappingApplyBlock(ltog,1,row,row);CHKERRQ(ierr);
1570   ierr = ISLocalToGlobalMappingApplyBlock(ltog,*cnt,col,col);CHKERRQ(ierr);
1571   for (i=0,n=0; i<*cnt; i++) {
1572     if (col[i] >= *row) col[n++] = col[i];
1573   }
1574   *cnt = n;
1575   PetscFunctionReturn(0);
1576 }
1577 
1578 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da,Mat J)
1579 {
1580   PetscErrorCode         ierr;
1581   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1582   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
1583   PetscInt               istart,iend,jstart,jend,ii,jj;
1584   MPI_Comm               comm;
1585   PetscScalar            *values;
1586   DMBoundaryType         bx,by;
1587   DMDAStencilType        st;
1588   ISLocalToGlobalMapping ltog;
1589 
1590   PetscFunctionBegin;
1591   /*
1592      nc - number of components per grid point
1593      col - number of colors needed in one direction for single component problem
1594   */
1595   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
1596   col  = 2*s + 1;
1597 
1598   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
1599   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
1600   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1601 
1602   ierr = PetscMalloc1(col*col*nc*nc,&cols);CHKERRQ(ierr);
1603 
1604   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1605 
1606   /* determine the matrix preallocation information */
1607   ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
1608   for (i=xs; i<xs+nx; i++) {
1609     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1610     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1611     for (j=ys; j<ys+ny; j++) {
1612       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1613       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1614       slot   = i - gxs + gnx*(j - gys);
1615 
1616       /* Find block columns in block row */
1617       cnt = 0;
1618       for (ii=istart; ii<iend+1; ii++) {
1619         for (jj=jstart; jj<jend+1; jj++) {
1620           if (st == DMDA_STENCIL_BOX || !ii || !jj) {
1621             cols[cnt++] = slot + ii + gnx*jj;
1622           }
1623         }
1624       }
1625       ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
1626       ierr = MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
1627     }
1628   }
1629   ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1630   ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1631   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1632 
1633   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1634 
1635   /*
1636     For each node in the grid: we get the neighbors in the local (on processor ordering
1637     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1638     PETSc ordering.
1639   */
1640   if (!da->prealloc_only) {
1641     ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr);
1642     for (i=xs; i<xs+nx; i++) {
1643       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1644       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1645       for (j=ys; j<ys+ny; j++) {
1646         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1647         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1648         slot   = i - gxs + gnx*(j - gys);
1649 
1650         /* Find block columns in block row */
1651         cnt = 0;
1652         for (ii=istart; ii<iend+1; ii++) {
1653           for (jj=jstart; jj<jend+1; jj++) {
1654             if (st == DMDA_STENCIL_BOX || !ii || !jj) {
1655               cols[cnt++] = slot + ii + gnx*jj;
1656             }
1657           }
1658         }
1659         ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
1660         ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1661       }
1662     }
1663     ierr = PetscFree(values);CHKERRQ(ierr);
1664     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1665     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1666     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1667   }
1668   ierr = PetscFree(cols);CHKERRQ(ierr);
1669   PetscFunctionReturn(0);
1670 }
1671 
1672 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da,Mat J)
1673 {
1674   PetscErrorCode         ierr;
1675   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1676   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
1677   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1678   MPI_Comm               comm;
1679   PetscScalar            *values;
1680   DMBoundaryType         bx,by,bz;
1681   DMDAStencilType        st;
1682   ISLocalToGlobalMapping ltog;
1683 
1684   PetscFunctionBegin;
1685   /*
1686      nc - number of components per grid point
1687      col - number of colors needed in one direction for single component problem
1688   */
1689   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1690   col  = 2*s + 1;
1691 
1692   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1693   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1694   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1695 
1696   /* create the matrix */
1697   ierr = PetscMalloc1(col*col*col,&cols);CHKERRQ(ierr);
1698 
1699   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1700 
1701   /* determine the matrix preallocation information */
1702   ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1703   for (i=xs; i<xs+nx; i++) {
1704     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1705     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1706     for (j=ys; j<ys+ny; j++) {
1707       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1708       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1709       for (k=zs; k<zs+nz; k++) {
1710         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1711         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1712 
1713         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1714 
1715         /* Find block columns in block row */
1716         cnt = 0;
1717         for (ii=istart; ii<iend+1; ii++) {
1718           for (jj=jstart; jj<jend+1; jj++) {
1719             for (kk=kstart; kk<kend+1; kk++) {
1720               if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
1721                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1722               }
1723             }
1724           }
1725         }
1726         ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
1727         ierr = MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
1728       }
1729     }
1730   }
1731   ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1732   ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1733   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1734 
1735   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1736 
1737   /*
1738     For each node in the grid: we get the neighbors in the local (on processor ordering
1739     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1740     PETSc ordering.
1741   */
1742   if (!da->prealloc_only) {
1743     ierr = PetscCalloc1(col*col*col*nc*nc,&values);CHKERRQ(ierr);
1744     for (i=xs; i<xs+nx; i++) {
1745       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1746       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1747       for (j=ys; j<ys+ny; j++) {
1748         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1749         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1750         for (k=zs; k<zs+nz; k++) {
1751           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1752           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1753 
1754           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1755 
1756           cnt = 0;
1757           for (ii=istart; ii<iend+1; ii++) {
1758             for (jj=jstart; jj<jend+1; jj++) {
1759               for (kk=kstart; kk<kend+1; kk++) {
1760                 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
1761                   cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1762                 }
1763               }
1764             }
1765           }
1766           ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
1767           ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1768         }
1769       }
1770     }
1771     ierr = PetscFree(values);CHKERRQ(ierr);
1772     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1773     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1774     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1775   }
1776   ierr = PetscFree(cols);CHKERRQ(ierr);
1777   PetscFunctionReturn(0);
1778 }
1779 
1780 /* ---------------------------------------------------------------------------------*/
1781 
1782 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da,Mat J)
1783 {
1784   PetscErrorCode         ierr;
1785   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1786   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt, maxcnt = 0,l,p,*dnz,*onz;
1787   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
1788   DM_DA                  *dd = (DM_DA*)da->data;
1789   PetscInt               ifill_col,*dfill = dd->dfill,*ofill = dd->ofill;
1790   MPI_Comm               comm;
1791   PetscScalar            *values;
1792   DMBoundaryType         bx,by,bz;
1793   ISLocalToGlobalMapping ltog;
1794   DMDAStencilType        st;
1795   PetscBool              removedups = PETSC_FALSE;
1796 
1797   PetscFunctionBegin;
1798   /*
1799          nc - number of components per grid point
1800          col - number of colors needed in one direction for single component problem
1801 
1802   */
1803   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1804   col  = 2*s + 1;
1805   if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
1806                  by 2*stencil_width + 1\n");
1807   if (by == DM_BOUNDARY_PERIODIC && (n % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
1808                  by 2*stencil_width + 1\n");
1809   if (bz == DM_BOUNDARY_PERIODIC && (p % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
1810                  by 2*stencil_width + 1\n");
1811 
1812   /*
1813        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1814        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1815   */
1816   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1817   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1818   if (P == 1 && 2*s >= p) removedups = PETSC_TRUE;
1819 
1820   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1821   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1822   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1823 
1824   ierr = PetscMalloc1(col*col*col*nc,&cols);CHKERRQ(ierr);
1825   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1826 
1827   /* determine the matrix preallocation information */
1828   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1829 
1830   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1831   for (i=xs; i<xs+nx; i++) {
1832     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1833     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1834     for (j=ys; j<ys+ny; j++) {
1835       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1836       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1837       for (k=zs; k<zs+nz; k++) {
1838         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1839         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1840 
1841         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1842 
1843         for (l=0; l<nc; l++) {
1844           cnt = 0;
1845           for (ii=istart; ii<iend+1; ii++) {
1846             for (jj=jstart; jj<jend+1; jj++) {
1847               for (kk=kstart; kk<kend+1; kk++) {
1848                 if (ii || jj || kk) {
1849                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1850                     for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1851                   }
1852                 } else {
1853                   if (dfill) {
1854                     for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1855                   } else {
1856                     for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1857                   }
1858                 }
1859               }
1860             }
1861           }
1862           row  = l + nc*(slot);
1863           maxcnt = PetscMax(maxcnt,cnt);
1864           if (removedups) {
1865             ierr = MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1866           } else {
1867             ierr = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1868           }
1869         }
1870       }
1871     }
1872   }
1873   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1874   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1875   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1876   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1877 
1878   /*
1879     For each node in the grid: we get the neighbors in the local (on processor ordering
1880     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1881     PETSc ordering.
1882   */
1883   if (!da->prealloc_only) {
1884     ierr = PetscCalloc1(maxcnt,&values);CHKERRQ(ierr);
1885     for (i=xs; i<xs+nx; i++) {
1886       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1887       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1888       for (j=ys; j<ys+ny; j++) {
1889         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1890         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1891         for (k=zs; k<zs+nz; k++) {
1892           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1893           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1894 
1895           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1896 
1897           for (l=0; l<nc; l++) {
1898             cnt = 0;
1899             for (ii=istart; ii<iend+1; ii++) {
1900               for (jj=jstart; jj<jend+1; jj++) {
1901                 for (kk=kstart; kk<kend+1; kk++) {
1902                   if (ii || jj || kk) {
1903                     if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1904                       for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1905                     }
1906                   } else {
1907                     if (dfill) {
1908                       for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1909                     } else {
1910                       for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1911                     }
1912                   }
1913                 }
1914               }
1915             }
1916             row  = l + nc*(slot);
1917             ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1918           }
1919         }
1920       }
1921     }
1922     ierr = PetscFree(values);CHKERRQ(ierr);
1923     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1924     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1925     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1926   }
1927   ierr = PetscFree(cols);CHKERRQ(ierr);
1928   PetscFunctionReturn(0);
1929 }
1930