xref: /petsc/src/dm/impls/da/fdda.c (revision c4e6da2c21f60ca208d50527ab05cf373da374c0)
1 
2 #include <petsc/private/dmdaimpl.h> /*I      "petscdmda.h"     I*/
3 #include <petscmat.h>
4 
5 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM,ISColoringType,ISColoring*);
6 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM,ISColoringType,ISColoring*);
7 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM,ISColoringType,ISColoring*);
8 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM,ISColoringType,ISColoring*);
9 
10 /*
11    For ghost i that may be negative or greater than the upper bound this
12   maps it into the 0:m-1 range using periodicity
13 */
14 #define SetInRange(i,m) ((i < 0) ? m+i : ((i >= m) ? i-m : i))
15 
16 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill,PetscInt w,PetscInt **rfill)
17 {
18   PetscErrorCode ierr;
19   PetscInt       i,j,nz,*fill;
20 
21   PetscFunctionBegin;
22   if (!dfill) PetscFunctionReturn(0);
23 
24   /* count number nonzeros */
25   nz = 0;
26   for (i=0; i<w; i++) {
27     for (j=0; j<w; j++) {
28       if (dfill[w*i+j]) nz++;
29     }
30   }
31   ierr = PetscMalloc1(nz + w + 1,&fill);CHKERRQ(ierr);
32   /* construct modified CSR storage of nonzero structure */
33   /*  fill[0 -- w] marks starts of each row of column indices (and end of last row)
34    so fill[1] - fill[0] gives number of nonzeros in first row etc */
35   nz = w + 1;
36   for (i=0; i<w; i++) {
37     fill[i] = nz;
38     for (j=0; j<w; j++) {
39       if (dfill[w*i+j]) {
40         fill[nz] = j;
41         nz++;
42       }
43     }
44   }
45   fill[w] = nz;
46 
47   *rfill = fill;
48   PetscFunctionReturn(0);
49 }
50 
51 /*@
52     DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem
53     of the matrix returned by DMCreateMatrix().
54 
55     Logically Collective on DMDA
56 
57     Input Parameter:
58 +   da - the distributed array
59 .   dfill - the fill pattern in the diagonal block (may be NULL, means use dense block)
60 -   ofill - the fill pattern in the off-diagonal blocks
61 
62 
63     Level: developer
64 
65     Notes:
66     This only makes sense when you are doing multicomponent problems but using the
67        MPIAIJ matrix format
68 
69            The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries
70        representing coupling and 0 entries for missing coupling. For example
71 $             dfill[9] = {1, 0, 0,
72 $                         1, 1, 0,
73 $                         0, 1, 1}
74        means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with
75        itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the
76        diagonal block).
77 
78      DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
79      can be represented in the dfill, ofill format
80 
81    Contributed by Glenn Hammond
82 
83 .seealso DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly()
84 
85 @*/
86 PetscErrorCode  DMDASetBlockFills(DM da,const PetscInt *dfill,const PetscInt *ofill)
87 {
88   DM_DA          *dd = (DM_DA*)da->data;
89   PetscErrorCode ierr;
90   PetscInt       i,k,cnt = 1;
91 
92   PetscFunctionBegin;
93   ierr = DMDASetBlockFills_Private(dfill,dd->w,&dd->dfill);CHKERRQ(ierr);
94   ierr = DMDASetBlockFills_Private(ofill,dd->w,&dd->ofill);CHKERRQ(ierr);
95 
96   /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of
97    columns to the left with any nonzeros in them plus 1 */
98   ierr = PetscCalloc1(dd->w,&dd->ofillcols);CHKERRQ(ierr);
99   for (i=0; i<dd->w; i++) {
100     for (k=dd->ofill[i]; k<dd->ofill[i+1]; k++) dd->ofillcols[dd->ofill[k]] = 1;
101   }
102   for (i=0; i<dd->w; i++) {
103     if (dd->ofillcols[i]) {
104       dd->ofillcols[i] = cnt++;
105     }
106   }
107   PetscFunctionReturn(0);
108 }
109 
110 
111 PetscErrorCode  DMCreateColoring_DA(DM da,ISColoringType ctype,ISColoring *coloring)
112 {
113   PetscErrorCode   ierr;
114   PetscInt         dim,m,n,p,nc;
115   DMBoundaryType bx,by,bz;
116   MPI_Comm         comm;
117   PetscMPIInt      size;
118   PetscBool        isBAIJ;
119   DM_DA            *dd = (DM_DA*)da->data;
120 
121   PetscFunctionBegin;
122   /*
123                                   m
124           ------------------------------------------------------
125          |                                                     |
126          |                                                     |
127          |               ----------------------                |
128          |               |                    |                |
129       n  |           yn  |                    |                |
130          |               |                    |                |
131          |               .---------------------                |
132          |             (xs,ys)     xn                          |
133          |            .                                        |
134          |         (gxs,gys)                                   |
135          |                                                     |
136           -----------------------------------------------------
137   */
138 
139   /*
140          nc - number of components per grid point
141          col - number of colors needed in one direction for single component problem
142 
143   */
144   ierr = DMDAGetInfo(da,&dim,0,0,0,&m,&n,&p,&nc,0,&bx,&by,&bz,0);CHKERRQ(ierr);
145 
146   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
147   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
148   if (ctype == IS_COLORING_LOCAL) {
149     if (size == 1) {
150       ctype = IS_COLORING_GLOBAL;
151     } else if (dim > 1) {
152       if ((m==1 && bx == DM_BOUNDARY_PERIODIC) || (n==1 && by == DM_BOUNDARY_PERIODIC) || (p==1 && bz == DM_BOUNDARY_PERIODIC)) {
153         SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain  on the same process");
154       }
155     }
156   }
157 
158   /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ
159      matrices is for the blocks, not the individual matrix elements  */
160   ierr = PetscStrcmp(da->mattype,MATBAIJ,&isBAIJ);CHKERRQ(ierr);
161   if (!isBAIJ) {ierr = PetscStrcmp(da->mattype,MATMPIBAIJ,&isBAIJ);CHKERRQ(ierr);}
162   if (!isBAIJ) {ierr = PetscStrcmp(da->mattype,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);}
163   if (isBAIJ) {
164     dd->w  = 1;
165     dd->xs = dd->xs/nc;
166     dd->xe = dd->xe/nc;
167     dd->Xs = dd->Xs/nc;
168     dd->Xe = dd->Xe/nc;
169   }
170 
171   /*
172      We do not provide a getcoloring function in the DMDA operations because
173    the basic DMDA does not know about matrices. We think of DMDA as being more
174    more low-level then matrices.
175   */
176   if (dim == 1) {
177     ierr = DMCreateColoring_DA_1d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
178   } else if (dim == 2) {
179     ierr =  DMCreateColoring_DA_2d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
180   } else if (dim == 3) {
181     ierr =  DMCreateColoring_DA_3d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
182   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not done for %D dimension, send us mail petsc-maint@mcs.anl.gov for code",dim);
183   if (isBAIJ) {
184     dd->w  = nc;
185     dd->xs = dd->xs*nc;
186     dd->xe = dd->xe*nc;
187     dd->Xs = dd->Xs*nc;
188     dd->Xe = dd->Xe*nc;
189   }
190   PetscFunctionReturn(0);
191 }
192 
193 /* ---------------------------------------------------------------------------------*/
194 
195 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
196 {
197   PetscErrorCode   ierr;
198   PetscInt         xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,M,N,dim,s,k,nc,col;
199   PetscInt         ncolors;
200   MPI_Comm         comm;
201   DMBoundaryType bx,by;
202   DMDAStencilType  st;
203   ISColoringValue  *colors;
204   DM_DA            *dd = (DM_DA*)da->data;
205 
206   PetscFunctionBegin;
207   /*
208          nc - number of components per grid point
209          col - number of colors needed in one direction for single component problem
210 
211   */
212   ierr = DMDAGetInfo(da,&dim,&m,&n,0,&M,&N,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
213   col  = 2*s + 1;
214   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
215   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
216   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
217 
218   /* special case as taught to us by Paul Hovland */
219   if (st == DMDA_STENCIL_STAR && s == 1) {
220     ierr = DMCreateColoring_DA_2d_5pt_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
221   } else {
222 
223     if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ2(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X (%d) is divisible\n\
224                                                             by 2*stencil_width + 1 (%d)\n", m, col);
225     if (by == DM_BOUNDARY_PERIODIC && (n % col)) SETERRQ2(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y (%d) is divisible\n\
226                                                             by 2*stencil_width + 1 (%d)\n", n, col);
227     if (ctype == IS_COLORING_GLOBAL) {
228       if (!dd->localcoloring) {
229         ierr = PetscMalloc1(nc*nx*ny,&colors);CHKERRQ(ierr);
230         ii   = 0;
231         for (j=ys; j<ys+ny; j++) {
232           for (i=xs; i<xs+nx; i++) {
233             for (k=0; k<nc; k++) {
234               colors[ii++] = k + nc*((i % col) + col*(j % col));
235             }
236           }
237         }
238         ncolors = nc + nc*(col-1 + col*(col-1));
239         ierr    = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
240       }
241       *coloring = dd->localcoloring;
242     } else if (ctype == IS_COLORING_LOCAL) {
243       if (!dd->ghostedcoloring) {
244         ierr = PetscMalloc1(nc*gnx*gny,&colors);CHKERRQ(ierr);
245         ii   = 0;
246         for (j=gys; j<gys+gny; j++) {
247           for (i=gxs; i<gxs+gnx; i++) {
248             for (k=0; k<nc; k++) {
249               /* the complicated stuff is to handle periodic boundaries */
250               colors[ii++] = k + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col));
251             }
252           }
253         }
254         ncolors = nc + nc*(col - 1 + col*(col-1));
255         ierr    = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
256         /* PetscIntView(ncolors,(PetscInt*)colors,0); */
257 
258         ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
259       }
260       *coloring = dd->ghostedcoloring;
261     } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
262   }
263   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
264   PetscFunctionReturn(0);
265 }
266 
267 /* ---------------------------------------------------------------------------------*/
268 
269 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
270 {
271   PetscErrorCode   ierr;
272   PetscInt         xs,ys,nx,ny,i,j,gxs,gys,gnx,gny,m,n,p,dim,s,k,nc,col,zs,gzs,ii,l,nz,gnz,M,N,P;
273   PetscInt         ncolors;
274   MPI_Comm         comm;
275   DMBoundaryType bx,by,bz;
276   DMDAStencilType  st;
277   ISColoringValue  *colors;
278   DM_DA            *dd = (DM_DA*)da->data;
279 
280   PetscFunctionBegin;
281   /*
282          nc - number of components per grid point
283          col - number of colors needed in one direction for single component problem
284 
285   */
286   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
287   col  = 2*s + 1;
288   if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
289                                                          by 2*stencil_width + 1\n");
290   if (by == DM_BOUNDARY_PERIODIC && (n % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
291                                                          by 2*stencil_width + 1\n");
292   if (bz == DM_BOUNDARY_PERIODIC && (p % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
293                                                          by 2*stencil_width + 1\n");
294 
295   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
296   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
297   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
298 
299   /* create the coloring */
300   if (ctype == IS_COLORING_GLOBAL) {
301     if (!dd->localcoloring) {
302       ierr = PetscMalloc1(nc*nx*ny*nz,&colors);CHKERRQ(ierr);
303       ii   = 0;
304       for (k=zs; k<zs+nz; k++) {
305         for (j=ys; j<ys+ny; j++) {
306           for (i=xs; i<xs+nx; i++) {
307             for (l=0; l<nc; l++) {
308               colors[ii++] = l + nc*((i % col) + col*(j % col) + col*col*(k % col));
309             }
310           }
311         }
312       }
313       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
314       ierr    = ISColoringCreate(comm,ncolors,nc*nx*ny*nz,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
315     }
316     *coloring = dd->localcoloring;
317   } else if (ctype == IS_COLORING_LOCAL) {
318     if (!dd->ghostedcoloring) {
319       ierr = PetscMalloc1(nc*gnx*gny*gnz,&colors);CHKERRQ(ierr);
320       ii   = 0;
321       for (k=gzs; k<gzs+gnz; k++) {
322         for (j=gys; j<gys+gny; j++) {
323           for (i=gxs; i<gxs+gnx; i++) {
324             for (l=0; l<nc; l++) {
325               /* the complicated stuff is to handle periodic boundaries */
326               colors[ii++] = l + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col) + col*col*(SetInRange(k,p) % col));
327             }
328           }
329         }
330       }
331       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
332       ierr    = ISColoringCreate(comm,ncolors,nc*gnx*gny*gnz,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
333       ierr    = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
334     }
335     *coloring = dd->ghostedcoloring;
336   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
337   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
338   PetscFunctionReturn(0);
339 }
340 
341 /* ---------------------------------------------------------------------------------*/
342 
343 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
344 {
345   PetscErrorCode   ierr;
346   PetscInt         xs,nx,i,i1,gxs,gnx,l,m,M,dim,s,nc,col;
347   PetscInt         ncolors;
348   MPI_Comm         comm;
349   DMBoundaryType bx;
350   ISColoringValue  *colors;
351   DM_DA            *dd = (DM_DA*)da->data;
352 
353   PetscFunctionBegin;
354   /*
355          nc - number of components per grid point
356          col - number of colors needed in one direction for single component problem
357 
358   */
359   ierr = DMDAGetInfo(da,&dim,&m,0,0,&M,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr);
360   col  = 2*s + 1;
361 
362   if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ2(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points %d is divisible\n\
363                                                           by 2*stencil_width + 1 %d\n",(int)m,(int)col);
364 
365   ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
366   ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
367   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
368 
369   /* create the coloring */
370   if (ctype == IS_COLORING_GLOBAL) {
371     if (!dd->localcoloring) {
372       ierr = PetscMalloc1(nc*nx,&colors);CHKERRQ(ierr);
373       if (dd->ofillcols) {
374         PetscInt tc = 0;
375         for (i=0; i<nc; i++) tc += (PetscInt) (dd->ofillcols[i] > 0);
376         i1 = 0;
377         for (i=xs; i<xs+nx; i++) {
378           for (l=0; l<nc; l++) {
379             if (dd->ofillcols[l] && (i % col)) {
380               colors[i1++] =  nc - 1 + tc*((i % col) - 1) + dd->ofillcols[l];
381             } else {
382               colors[i1++] = l;
383             }
384           }
385         }
386         ncolors = nc + 2*s*tc;
387       } else {
388         i1 = 0;
389         for (i=xs; i<xs+nx; i++) {
390           for (l=0; l<nc; l++) {
391             colors[i1++] = l + nc*(i % col);
392           }
393         }
394         ncolors = nc + nc*(col-1);
395       }
396       ierr = ISColoringCreate(comm,ncolors,nc*nx,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
397     }
398     *coloring = dd->localcoloring;
399   } else if (ctype == IS_COLORING_LOCAL) {
400     if (!dd->ghostedcoloring) {
401       ierr = PetscMalloc1(nc*gnx,&colors);CHKERRQ(ierr);
402       i1   = 0;
403       for (i=gxs; i<gxs+gnx; i++) {
404         for (l=0; l<nc; l++) {
405           /* the complicated stuff is to handle periodic boundaries */
406           colors[i1++] = l + nc*(SetInRange(i,m) % col);
407         }
408       }
409       ncolors = nc + nc*(col-1);
410       ierr    = ISColoringCreate(comm,ncolors,nc*gnx,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
411       ierr    = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
412     }
413     *coloring = dd->ghostedcoloring;
414   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
415   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
416   PetscFunctionReturn(0);
417 }
418 
419 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
420 {
421   PetscErrorCode   ierr;
422   PetscInt         xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,dim,s,k,nc;
423   PetscInt         ncolors;
424   MPI_Comm         comm;
425   DMBoundaryType bx,by;
426   ISColoringValue  *colors;
427   DM_DA            *dd = (DM_DA*)da->data;
428 
429   PetscFunctionBegin;
430   /*
431          nc - number of components per grid point
432          col - number of colors needed in one direction for single component problem
433 
434   */
435   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,0);CHKERRQ(ierr);
436   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
437   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
438   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
439 
440   if (bx == DM_BOUNDARY_PERIODIC && (m % 5)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible by 5\n");
441   if (by == DM_BOUNDARY_PERIODIC && (n % 5)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible by 5\n");
442 
443   /* create the coloring */
444   if (ctype == IS_COLORING_GLOBAL) {
445     if (!dd->localcoloring) {
446       ierr = PetscMalloc1(nc*nx*ny,&colors);CHKERRQ(ierr);
447       ii   = 0;
448       for (j=ys; j<ys+ny; j++) {
449         for (i=xs; i<xs+nx; i++) {
450           for (k=0; k<nc; k++) {
451             colors[ii++] = k + nc*((3*j+i) % 5);
452           }
453         }
454       }
455       ncolors = 5*nc;
456       ierr    = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
457     }
458     *coloring = dd->localcoloring;
459   } else if (ctype == IS_COLORING_LOCAL) {
460     if (!dd->ghostedcoloring) {
461       ierr = PetscMalloc1(nc*gnx*gny,&colors);CHKERRQ(ierr);
462       ii = 0;
463       for (j=gys; j<gys+gny; j++) {
464         for (i=gxs; i<gxs+gnx; i++) {
465           for (k=0; k<nc; k++) {
466             colors[ii++] = k + nc*((3*SetInRange(j,n) + SetInRange(i,m)) % 5);
467           }
468         }
469       }
470       ncolors = 5*nc;
471       ierr    = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
472       ierr    = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
473     }
474     *coloring = dd->ghostedcoloring;
475   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
476   PetscFunctionReturn(0);
477 }
478 
479 /* =========================================================================== */
480 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM,Mat);
481 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM,Mat);
482 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM,Mat);
483 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM,Mat);
484 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM,Mat);
485 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM,Mat);
486 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM,Mat);
487 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM,Mat);
488 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM,Mat);
489 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM,Mat);
490 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM,Mat);
491 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM,Mat);
492 extern PetscErrorCode DMCreateMatrix_DA_IS(DM,Mat);
493 
494 /*@C
495    MatSetupDM - Sets the DMDA that is to be used by the HYPRE_StructMatrix PETSc matrix
496 
497    Logically Collective on Mat
498 
499    Input Parameters:
500 +  mat - the matrix
501 -  da - the da
502 
503    Level: intermediate
504 
505 @*/
506 PetscErrorCode MatSetupDM(Mat mat,DM da)
507 {
508   PetscErrorCode ierr;
509 
510   PetscFunctionBegin;
511   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
512   PetscValidHeaderSpecificType(da,DM_CLASSID,1,DMDA);
513   ierr = PetscTryMethod(mat,"MatSetupDM_C",(Mat,DM),(mat,da));CHKERRQ(ierr);
514   PetscFunctionReturn(0);
515 }
516 
517 PetscErrorCode  MatView_MPI_DA(Mat A,PetscViewer viewer)
518 {
519   DM                da;
520   PetscErrorCode    ierr;
521   const char        *prefix;
522   Mat               Anatural;
523   AO                ao;
524   PetscInt          rstart,rend,*petsc,i;
525   IS                is;
526   MPI_Comm          comm;
527   PetscViewerFormat format;
528 
529   PetscFunctionBegin;
530   /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */
531   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
532   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(0);
533 
534   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
535   ierr = MatGetDM(A, &da);CHKERRQ(ierr);
536   if (!da) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
537 
538   ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);
539   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
540   ierr = PetscMalloc1(rend-rstart,&petsc);CHKERRQ(ierr);
541   for (i=rstart; i<rend; i++) petsc[i-rstart] = i;
542   ierr = AOApplicationToPetsc(ao,rend-rstart,petsc);CHKERRQ(ierr);
543   ierr = ISCreateGeneral(comm,rend-rstart,petsc,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
544 
545   /* call viewer on natural ordering */
546   ierr = MatCreateSubMatrix(A,is,is,MAT_INITIAL_MATRIX,&Anatural);CHKERRQ(ierr);
547   ierr = ISDestroy(&is);CHKERRQ(ierr);
548   ierr = PetscObjectGetOptionsPrefix((PetscObject)A,&prefix);CHKERRQ(ierr);
549   ierr = PetscObjectSetOptionsPrefix((PetscObject)Anatural,prefix);CHKERRQ(ierr);
550   ierr = PetscObjectSetName((PetscObject)Anatural,((PetscObject)A)->name);CHKERRQ(ierr);
551   ierr = MatView(Anatural,viewer);CHKERRQ(ierr);
552   ierr = MatDestroy(&Anatural);CHKERRQ(ierr);
553   PetscFunctionReturn(0);
554 }
555 
556 PetscErrorCode  MatLoad_MPI_DA(Mat A,PetscViewer viewer)
557 {
558   DM             da;
559   PetscErrorCode ierr;
560   Mat            Anatural,Aapp;
561   AO             ao;
562   PetscInt       rstart,rend,*app,i,m,n,M,N;
563   IS             is;
564   MPI_Comm       comm;
565 
566   PetscFunctionBegin;
567   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
568   ierr = MatGetDM(A, &da);CHKERRQ(ierr);
569   if (!da) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
570 
571   /* Load the matrix in natural ordering */
572   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Anatural);CHKERRQ(ierr);
573   ierr = MatSetType(Anatural,((PetscObject)A)->type_name);CHKERRQ(ierr);
574   ierr = MatGetSize(A,&M,&N);CHKERRQ(ierr);
575   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
576   ierr = MatSetSizes(Anatural,m,n,M,N);CHKERRQ(ierr);
577   ierr = MatLoad(Anatural,viewer);CHKERRQ(ierr);
578 
579   /* Map natural ordering to application ordering and create IS */
580   ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);
581   ierr = MatGetOwnershipRange(Anatural,&rstart,&rend);CHKERRQ(ierr);
582   ierr = PetscMalloc1(rend-rstart,&app);CHKERRQ(ierr);
583   for (i=rstart; i<rend; i++) app[i-rstart] = i;
584   ierr = AOPetscToApplication(ao,rend-rstart,app);CHKERRQ(ierr);
585   ierr = ISCreateGeneral(comm,rend-rstart,app,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
586 
587   /* Do permutation and replace header */
588   ierr = MatCreateSubMatrix(Anatural,is,is,MAT_INITIAL_MATRIX,&Aapp);CHKERRQ(ierr);
589   ierr = MatHeaderReplace(A,&Aapp);CHKERRQ(ierr);
590   ierr = ISDestroy(&is);CHKERRQ(ierr);
591   ierr = MatDestroy(&Anatural);CHKERRQ(ierr);
592   PetscFunctionReturn(0);
593 }
594 
595 PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J)
596 {
597   PetscErrorCode ierr;
598   PetscInt       dim,dof,nx,ny,nz,dims[3],starts[3],M,N,P;
599   Mat            A;
600   MPI_Comm       comm;
601   MatType        Atype;
602   PetscSection   section, sectionGlobal;
603   void           (*aij)(void)=NULL,(*baij)(void)=NULL,(*sbaij)(void)=NULL,(*sell)(void)=NULL,(*is)(void)=NULL;
604   MatType        mtype;
605   PetscMPIInt    size;
606   DM_DA          *dd = (DM_DA*)da->data;
607 
608   PetscFunctionBegin;
609   ierr = MatInitializePackage();CHKERRQ(ierr);
610   mtype = da->mattype;
611 
612   ierr = DMGetDefaultSection(da, &section);CHKERRQ(ierr);
613   if (section) {
614     PetscInt  bs = -1;
615     PetscInt  localSize;
616     PetscBool isShell, isBlock, isSeqBlock, isMPIBlock, isSymBlock, isSymSeqBlock, isSymMPIBlock, isSymmetric;
617 
618     ierr = DMGetDefaultGlobalSection(da, &sectionGlobal);CHKERRQ(ierr);
619     ierr = PetscSectionGetConstrainedStorageSize(sectionGlobal, &localSize);CHKERRQ(ierr);
620     ierr = MatCreate(PetscObjectComm((PetscObject)da),&A);CHKERRQ(ierr);
621     ierr = MatSetSizes(A,localSize,localSize,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
622     ierr = MatSetType(A,mtype);CHKERRQ(ierr);
623     ierr = PetscStrcmp(mtype,MATSHELL,&isShell);CHKERRQ(ierr);
624     ierr = PetscStrcmp(mtype,MATBAIJ,&isBlock);CHKERRQ(ierr);
625     ierr = PetscStrcmp(mtype,MATSEQBAIJ,&isSeqBlock);CHKERRQ(ierr);
626     ierr = PetscStrcmp(mtype,MATMPIBAIJ,&isMPIBlock);CHKERRQ(ierr);
627     ierr = PetscStrcmp(mtype,MATSBAIJ,&isSymBlock);CHKERRQ(ierr);
628     ierr = PetscStrcmp(mtype,MATSEQSBAIJ,&isSymSeqBlock);CHKERRQ(ierr);
629     ierr = PetscStrcmp(mtype,MATMPISBAIJ,&isSymMPIBlock);CHKERRQ(ierr);
630     /* Check for symmetric storage */
631     isSymmetric = (PetscBool) (isSymBlock || isSymSeqBlock || isSymMPIBlock);
632     if (isSymmetric) {
633       ierr = MatSetOption(*J, MAT_IGNORE_LOWER_TRIANGULAR, PETSC_TRUE);CHKERRQ(ierr);
634     }
635     if (!isShell) {
636       PetscInt *dnz, *onz, *dnzu, *onzu, bsLocal;
637 
638       if (bs < 0) {
639         if (isBlock || isSeqBlock || isMPIBlock || isSymBlock || isSymSeqBlock || isSymMPIBlock) {
640           PetscInt pStart, pEnd, p, dof;
641 
642           ierr = PetscSectionGetChart(sectionGlobal, &pStart, &pEnd);CHKERRQ(ierr);
643           for (p = pStart; p < pEnd; ++p) {
644             ierr = PetscSectionGetDof(sectionGlobal, p, &dof);CHKERRQ(ierr);
645             if (dof) {
646               bs = dof;
647               break;
648             }
649           }
650         } else {
651           bs = 1;
652         }
653         /* Must have same blocksize on all procs (some might have no points) */
654         bsLocal = bs;
655         ierr    = MPIU_Allreduce(&bsLocal, &bs, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)da));CHKERRQ(ierr);
656       }
657       ierr = PetscCalloc4(localSize/bs, &dnz, localSize/bs, &onz, localSize/bs, &dnzu, localSize/bs, &onzu);CHKERRQ(ierr);
658       /* ierr = DMPlexPreallocateOperator(dm, bs, section, sectionGlobal, dnz, onz, dnzu, onzu, *J, fillMatrix);CHKERRQ(ierr); */
659       ierr = PetscFree4(dnz, onz, dnzu, onzu);CHKERRQ(ierr);
660     }
661   }
662   /*
663                                   m
664           ------------------------------------------------------
665          |                                                     |
666          |                                                     |
667          |               ----------------------                |
668          |               |                    |                |
669       n  |           ny  |                    |                |
670          |               |                    |                |
671          |               .---------------------                |
672          |             (xs,ys)     nx                          |
673          |            .                                        |
674          |         (gxs,gys)                                   |
675          |                                                     |
676           -----------------------------------------------------
677   */
678 
679   /*
680          nc - number of components per grid point
681          col - number of colors needed in one direction for single component problem
682 
683   */
684   M   = dd->M;
685   N   = dd->N;
686   P   = dd->P;
687   dim = da->dim;
688   dof = dd->w;
689   /* ierr = DMDAGetInfo(da,&dim,&M,&N,&P,0,0,0,&dof,0,0,0,0,0);CHKERRQ(ierr); */
690   ierr = DMDAGetCorners(da,0,0,0,&nx,&ny,&nz);CHKERRQ(ierr);
691   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
692   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
693   ierr = MatSetSizes(A,dof*nx*ny*nz,dof*nx*ny*nz,dof*M*N*P,dof*M*N*P);CHKERRQ(ierr);
694   ierr = MatSetType(A,mtype);CHKERRQ(ierr);
695   ierr = MatSetDM(A,da);CHKERRQ(ierr);
696   if (da->structure_only) {
697     ierr = MatSetOption(A,MAT_STRUCTURE_ONLY,PETSC_TRUE);CHKERRQ(ierr);
698   }
699   ierr = MatGetType(A,&Atype);CHKERRQ(ierr);
700   /*
701      We do not provide a getmatrix function in the DMDA operations because
702    the basic DMDA does not know about matrices. We think of DMDA as being more
703    more low-level than matrices. This is kind of cheating but, cause sometimes
704    we think of DMDA has higher level than matrices.
705 
706      We could switch based on Atype (or mtype), but we do not since the
707    specialized setting routines depend only the particular preallocation
708    details of the matrix, not the type itself.
709   */
710   ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJSetPreallocation_C",&aij);CHKERRQ(ierr);
711   if (!aij) {
712     ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJSetPreallocation_C",&aij);CHKERRQ(ierr);
713   }
714   if (!aij) {
715     ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIBAIJSetPreallocation_C",&baij);CHKERRQ(ierr);
716     if (!baij) {
717       ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",&baij);CHKERRQ(ierr);
718     }
719     if (!baij) {
720       ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPISBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr);
721       if (!sbaij) {
722         ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr);
723       }
724       if (!sbaij) {
725         ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPISELLSetPreallocation_C",&sell);CHKERRQ(ierr);
726         if (!sell) {
727           ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqSELLSetPreallocation_C",&sell);CHKERRQ(ierr);
728         }
729       }
730       if (!sell) {
731         ierr = PetscObjectQueryFunction((PetscObject)A,"MatISSetPreallocation_C",&is);CHKERRQ(ierr);
732       }
733     }
734   }
735   if (aij) {
736     if (dim == 1) {
737       if (dd->ofill) {
738         ierr = DMCreateMatrix_DA_1d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
739       } else {
740         ierr = DMCreateMatrix_DA_1d_MPIAIJ(da,A);CHKERRQ(ierr);
741       }
742     } else if (dim == 2) {
743       if (dd->ofill) {
744         ierr = DMCreateMatrix_DA_2d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
745       } else {
746         ierr = DMCreateMatrix_DA_2d_MPIAIJ(da,A);CHKERRQ(ierr);
747       }
748     } else if (dim == 3) {
749       if (dd->ofill) {
750         ierr = DMCreateMatrix_DA_3d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
751       } else {
752         ierr = DMCreateMatrix_DA_3d_MPIAIJ(da,A);CHKERRQ(ierr);
753       }
754     }
755   } else if (baij) {
756     if (dim == 2) {
757       ierr = DMCreateMatrix_DA_2d_MPIBAIJ(da,A);CHKERRQ(ierr);
758     } else if (dim == 3) {
759       ierr = DMCreateMatrix_DA_3d_MPIBAIJ(da,A);CHKERRQ(ierr);
760     } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
761   } else if (sbaij) {
762     if (dim == 2) {
763       ierr = DMCreateMatrix_DA_2d_MPISBAIJ(da,A);CHKERRQ(ierr);
764     } else if (dim == 3) {
765       ierr = DMCreateMatrix_DA_3d_MPISBAIJ(da,A);CHKERRQ(ierr);
766     } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
767   } else if (sell) {
768      if (dim == 2) {
769        ierr = DMCreateMatrix_DA_2d_MPISELL(da,A);CHKERRQ(ierr);
770      } else if (dim == 3) {
771        ierr = DMCreateMatrix_DA_3d_MPISELL(da,A);CHKERRQ(ierr);
772      } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
773   } else if (is) {
774     ierr = DMCreateMatrix_DA_IS(da,A);CHKERRQ(ierr);
775   } else {
776     ISLocalToGlobalMapping ltog;
777 
778     ierr = MatSetBlockSize(A,dof);CHKERRQ(ierr);
779     ierr = MatSetUp(A);CHKERRQ(ierr);
780     ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
781     ierr = MatSetLocalToGlobalMapping(A,ltog,ltog);CHKERRQ(ierr);
782   }
783   ierr = DMDAGetGhostCorners(da,&starts[0],&starts[1],&starts[2],&dims[0],&dims[1],&dims[2]);CHKERRQ(ierr);
784   ierr = MatSetStencil(A,dim,dims,starts,dof);CHKERRQ(ierr);
785   ierr = MatSetDM(A,da);CHKERRQ(ierr);
786   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
787   if (size > 1) {
788     /* change viewer to display matrix in natural ordering */
789     ierr = MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA);CHKERRQ(ierr);
790     ierr = MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA);CHKERRQ(ierr);
791   }
792   ierr = MatSetFromOptions(A);CHKERRQ(ierr);
793   *J = A;
794   PetscFunctionReturn(0);
795 }
796 
797 /* ---------------------------------------------------------------------------------*/
798 PetscErrorCode DMCreateMatrix_DA_IS(DM dm,Mat J)
799 {
800   DM_DA                  *da = (DM_DA*)dm->data;
801   Mat                    lJ;
802   ISLocalToGlobalMapping ltog;
803   IS                     is_loc_filt, is_glob;
804   const PetscInt         *e_loc,*idx;
805   PetscInt               i,nel,nen,dnz,nv,dof,dim,*gidx,nb;
806   PetscErrorCode         ierr;
807 
808   /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted)
809      We need to filter the local indices that are represented through the DMDAGetElements decomposition
810      This is because the size of the local matrices in MATIS is the local size of the l2g map */
811   PetscFunctionBegin;
812   dof  = da->w;
813   dim  = dm->dim;
814 
815   ierr = MatSetBlockSize(J,dof);CHKERRQ(ierr);
816 
817   /* get local elements indices in local DMDA numbering */
818   ierr = DMDAGetElements(dm,&nel,&nen,&e_loc);CHKERRQ(ierr); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */
819   ierr = ISCreateBlock(PetscObjectComm((PetscObject)dm),dof,nel*nen,e_loc,PETSC_COPY_VALUES,&is_loc_filt);CHKERRQ(ierr);
820   ierr = DMDARestoreElements(dm,&nel,&nen,&e_loc);CHKERRQ(ierr);
821 
822   /* obtain a consistent local ordering for MATIS */
823   ierr = ISSortRemoveDups(is_loc_filt);CHKERRQ(ierr);
824   ierr = ISBlockGetLocalSize(is_loc_filt,&nb);CHKERRQ(ierr);
825   ierr = DMGetLocalToGlobalMapping(dm,&ltog);CHKERRQ(ierr);
826   ierr = ISLocalToGlobalMappingGetSize(ltog,&nv);CHKERRQ(ierr);
827   ierr = PetscMalloc1(PetscMax(nb,nv/dof),&gidx);CHKERRQ(ierr);
828   ierr = ISBlockGetIndices(is_loc_filt,&idx);CHKERRQ(ierr);
829   ierr = ISLocalToGlobalMappingApplyBlock(ltog,nb,idx,gidx);CHKERRQ(ierr);
830   ierr = ISBlockRestoreIndices(is_loc_filt,&idx);CHKERRQ(ierr);
831   ierr = ISCreateBlock(PetscObjectComm((PetscObject)dm),dof,nb,gidx,PETSC_USE_POINTER,&is_glob);CHKERRQ(ierr);
832   ierr = ISLocalToGlobalMappingCreateIS(is_glob,&ltog);CHKERRQ(ierr);
833   ierr = ISDestroy(&is_glob);CHKERRQ(ierr);
834   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
835   ierr = ISLocalToGlobalMappingDestroy(&ltog);CHKERRQ(ierr);
836 
837   /* We also attach a l2g map to the local matrices to have MatSetValueLocal to work */
838   ierr = MatISGetLocalMat(J,&lJ);CHKERRQ(ierr);
839   ierr = ISLocalToGlobalMappingCreateIS(is_loc_filt,&ltog);CHKERRQ(ierr);
840   ierr = ISDestroy(&is_loc_filt);CHKERRQ(ierr);
841   ierr = ISCreateStride(PetscObjectComm((PetscObject)lJ),nv/dof,0,1,&is_glob);CHKERRQ(ierr);
842   ierr = ISGetIndices(is_glob,&idx);CHKERRQ(ierr);
843   ierr = ISGlobalToLocalMappingApplyBlock(ltog,IS_GTOLM_MASK,nv/dof,idx,&nb,gidx);CHKERRQ(ierr);
844   ierr = ISRestoreIndices(is_glob,&idx);CHKERRQ(ierr);
845   ierr = ISDestroy(&is_glob);CHKERRQ(ierr);
846   ierr = ISLocalToGlobalMappingDestroy(&ltog);CHKERRQ(ierr);
847   ierr = ISCreateBlock(PETSC_COMM_SELF,dof,nb,gidx,PETSC_USE_POINTER,&is_loc_filt);CHKERRQ(ierr);
848   ierr = ISLocalToGlobalMappingCreateIS(is_loc_filt,&ltog);CHKERRQ(ierr);
849   ierr = ISDestroy(&is_loc_filt);CHKERRQ(ierr);
850   ierr = MatSetLocalToGlobalMapping(lJ,ltog,ltog);CHKERRQ(ierr);
851   ierr = ISLocalToGlobalMappingDestroy(&ltog);CHKERRQ(ierr);
852   ierr = PetscFree(gidx);CHKERRQ(ierr);
853 
854   /* Preallocation (not exact) */
855   switch (da->elementtype) {
856   case DMDA_ELEMENT_P1:
857   case DMDA_ELEMENT_Q1:
858     dnz = 1;
859     for (i=0; i<dim; i++) dnz *= 3;
860     dnz *= dof;
861     break;
862   default:
863     SETERRQ1(PetscObjectComm((PetscObject)dm),PETSC_ERR_SUP,"Unhandled element type %d",da->elementtype);
864     break;
865   }
866   ierr = MatSeqAIJSetPreallocation(lJ,dnz,NULL);CHKERRQ(ierr);
867   ierr = MatSeqBAIJSetPreallocation(lJ,dof,dnz/dof,NULL);CHKERRQ(ierr);
868   ierr = MatSeqSBAIJSetPreallocation(lJ,dof,dnz/dof,NULL);CHKERRQ(ierr);
869   ierr = MatISRestoreLocalMat(J,&lJ);CHKERRQ(ierr);
870   PetscFunctionReturn(0);
871 }
872 
873 PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da,Mat J)
874 {
875   PetscErrorCode         ierr;
876   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p;
877   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
878   MPI_Comm               comm;
879   PetscScalar            *values;
880   DMBoundaryType         bx,by;
881   ISLocalToGlobalMapping ltog;
882   DMDAStencilType        st;
883 
884   PetscFunctionBegin;
885   /*
886          nc - number of components per grid point
887          col - number of colors needed in one direction for single component problem
888 
889   */
890   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
891   col  = 2*s + 1;
892   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
893   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
894   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
895 
896   ierr = PetscMalloc2(nc,&rows,col*col*nc*nc,&cols);CHKERRQ(ierr);
897   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
898 
899   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
900   /* determine the matrix preallocation information */
901   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
902   for (i=xs; i<xs+nx; i++) {
903 
904     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
905     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
906 
907     for (j=ys; j<ys+ny; j++) {
908       slot = i - gxs + gnx*(j - gys);
909 
910       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
911       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
912 
913       cnt = 0;
914       for (k=0; k<nc; k++) {
915         for (l=lstart; l<lend+1; l++) {
916           for (p=pstart; p<pend+1; p++) {
917             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
918               cols[cnt++] = k + nc*(slot + gnx*l + p);
919             }
920           }
921         }
922         rows[k] = k + nc*(slot);
923       }
924       ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
925     }
926   }
927   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
928   ierr = MatSeqSELLSetPreallocation(J,0,dnz);CHKERRQ(ierr);
929   ierr = MatMPISELLSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
930   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
931 
932   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
933 
934   /*
935     For each node in the grid: we get the neighbors in the local (on processor ordering
936     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
937     PETSc ordering.
938   */
939   if (!da->prealloc_only) {
940     ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr);
941     for (i=xs; i<xs+nx; i++) {
942 
943       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
944       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
945 
946       for (j=ys; j<ys+ny; j++) {
947         slot = i - gxs + gnx*(j - gys);
948 
949         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
950         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
951 
952         cnt = 0;
953         for (k=0; k<nc; k++) {
954           for (l=lstart; l<lend+1; l++) {
955             for (p=pstart; p<pend+1; p++) {
956               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
957                 cols[cnt++] = k + nc*(slot + gnx*l + p);
958               }
959             }
960           }
961           rows[k] = k + nc*(slot);
962         }
963         ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
964       }
965     }
966     ierr = PetscFree(values);CHKERRQ(ierr);
967     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
968     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
969     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
970   }
971   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
972   PetscFunctionReturn(0);
973 }
974 
975 PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da,Mat J)
976 {
977   PetscErrorCode         ierr;
978   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
979   PetscInt               m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL;
980   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
981   MPI_Comm               comm;
982   PetscScalar            *values;
983   DMBoundaryType         bx,by,bz;
984   ISLocalToGlobalMapping ltog;
985   DMDAStencilType        st;
986 
987   PetscFunctionBegin;
988   /*
989          nc - number of components per grid point
990          col - number of colors needed in one direction for single component problem
991 
992   */
993   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
994   col  = 2*s + 1;
995   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
996   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
997   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
998 
999   ierr = PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols);CHKERRQ(ierr);
1000   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1001 
1002   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1003   /* determine the matrix preallocation information */
1004   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1005   for (i=xs; i<xs+nx; i++) {
1006     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1007     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1008     for (j=ys; j<ys+ny; j++) {
1009       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1010       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1011       for (k=zs; k<zs+nz; k++) {
1012         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1013         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1014 
1015         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1016 
1017         cnt = 0;
1018         for (l=0; l<nc; l++) {
1019           for (ii=istart; ii<iend+1; ii++) {
1020             for (jj=jstart; jj<jend+1; jj++) {
1021               for (kk=kstart; kk<kend+1; kk++) {
1022                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1023                   cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1024                 }
1025               }
1026             }
1027           }
1028           rows[l] = l + nc*(slot);
1029         }
1030         ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1031       }
1032     }
1033   }
1034   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1035   ierr = MatSeqSELLSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1036   ierr = MatMPISELLSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1037   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1038   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1039 
1040   /*
1041     For each node in the grid: we get the neighbors in the local (on processor ordering
1042     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1043     PETSc ordering.
1044   */
1045   if (!da->prealloc_only) {
1046     ierr = PetscCalloc1(col*col*col*nc*nc*nc,&values);CHKERRQ(ierr);
1047     for (i=xs; i<xs+nx; i++) {
1048       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1049       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1050       for (j=ys; j<ys+ny; j++) {
1051         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1052         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1053         for (k=zs; k<zs+nz; k++) {
1054           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1055           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1056 
1057           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1058 
1059           cnt = 0;
1060           for (l=0; l<nc; l++) {
1061             for (ii=istart; ii<iend+1; ii++) {
1062               for (jj=jstart; jj<jend+1; jj++) {
1063                 for (kk=kstart; kk<kend+1; kk++) {
1064                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1065                     cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1066                   }
1067                 }
1068               }
1069             }
1070             rows[l] = l + nc*(slot);
1071           }
1072           ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1073         }
1074       }
1075     }
1076     ierr = PetscFree(values);CHKERRQ(ierr);
1077     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1078     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1079     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1080   }
1081   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1082   PetscFunctionReturn(0);
1083 }
1084 
1085 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da,Mat J)
1086 {
1087   PetscErrorCode         ierr;
1088   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,M,N;
1089   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
1090   MPI_Comm               comm;
1091   PetscScalar            *values;
1092   DMBoundaryType         bx,by;
1093   ISLocalToGlobalMapping ltog;
1094   DMDAStencilType        st;
1095   PetscBool              removedups = PETSC_FALSE;
1096 
1097   PetscFunctionBegin;
1098   /*
1099          nc - number of components per grid point
1100          col - number of colors needed in one direction for single component problem
1101 
1102   */
1103   ierr = DMDAGetInfo(da,&dim,&m,&n,&M,&N,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
1104   col  = 2*s + 1;
1105   /*
1106        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1107        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1108   */
1109   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1110   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1111   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
1112   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
1113   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1114 
1115   ierr = PetscMalloc2(nc,&rows,col*col*nc*nc,&cols);CHKERRQ(ierr);
1116   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1117 
1118   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1119   /* determine the matrix preallocation information */
1120   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
1121   for (i=xs; i<xs+nx; i++) {
1122 
1123     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1124     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1125 
1126     for (j=ys; j<ys+ny; j++) {
1127       slot = i - gxs + gnx*(j - gys);
1128 
1129       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1130       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1131 
1132       cnt = 0;
1133       for (k=0; k<nc; k++) {
1134         for (l=lstart; l<lend+1; l++) {
1135           for (p=pstart; p<pend+1; p++) {
1136             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
1137               cols[cnt++] = k + nc*(slot + gnx*l + p);
1138             }
1139           }
1140         }
1141         rows[k] = k + nc*(slot);
1142       }
1143       if (removedups) {
1144         ierr = MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1145       } else {
1146         ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1147       }
1148     }
1149   }
1150   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1151   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1152   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1153   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1154 
1155   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1156 
1157   /*
1158     For each node in the grid: we get the neighbors in the local (on processor ordering
1159     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1160     PETSc ordering.
1161   */
1162   if (!da->prealloc_only) {
1163     ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr);
1164     for (i=xs; i<xs+nx; i++) {
1165 
1166       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1167       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1168 
1169       for (j=ys; j<ys+ny; j++) {
1170         slot = i - gxs + gnx*(j - gys);
1171 
1172         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1173         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1174 
1175         cnt = 0;
1176         for (k=0; k<nc; k++) {
1177           for (l=lstart; l<lend+1; l++) {
1178             for (p=pstart; p<pend+1; p++) {
1179               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
1180                 cols[cnt++] = k + nc*(slot + gnx*l + p);
1181               }
1182             }
1183           }
1184           rows[k] = k + nc*(slot);
1185         }
1186         ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1187       }
1188     }
1189     ierr = PetscFree(values);CHKERRQ(ierr);
1190     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1191     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1192     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1193   }
1194   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1195   PetscFunctionReturn(0);
1196 }
1197 
1198 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da,Mat J)
1199 {
1200   PetscErrorCode         ierr;
1201   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1202   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt,maxcnt = 0,l,p,M,N;
1203   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
1204   DM_DA                  *dd = (DM_DA*)da->data;
1205   PetscInt               ifill_col,*ofill = dd->ofill, *dfill = dd->dfill;
1206   MPI_Comm               comm;
1207   PetscScalar            *values;
1208   DMBoundaryType         bx,by;
1209   ISLocalToGlobalMapping ltog;
1210   DMDAStencilType        st;
1211   PetscBool              removedups = PETSC_FALSE;
1212 
1213   PetscFunctionBegin;
1214   /*
1215          nc - number of components per grid point
1216          col - number of colors needed in one direction for single component problem
1217 
1218   */
1219   ierr = DMDAGetInfo(da,&dim,&m,&n,&M,&N,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
1220   col  = 2*s + 1;
1221   /*
1222        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1223        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1224   */
1225   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1226   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1227   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
1228   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
1229   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1230 
1231   ierr = PetscMalloc1(col*col*nc,&cols);CHKERRQ(ierr);
1232   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1233 
1234   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1235   /* determine the matrix preallocation information */
1236   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
1237   for (i=xs; i<xs+nx; i++) {
1238 
1239     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1240     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1241 
1242     for (j=ys; j<ys+ny; j++) {
1243       slot = i - gxs + gnx*(j - gys);
1244 
1245       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1246       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1247 
1248       for (k=0; k<nc; k++) {
1249         cnt = 0;
1250         for (l=lstart; l<lend+1; l++) {
1251           for (p=pstart; p<pend+1; p++) {
1252             if (l || p) {
1253               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
1254                 for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p);
1255               }
1256             } else {
1257               if (dfill) {
1258                 for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p);
1259               } else {
1260                 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p);
1261               }
1262             }
1263           }
1264         }
1265         row    = k + nc*(slot);
1266         maxcnt = PetscMax(maxcnt,cnt);
1267         if (removedups) {
1268           ierr   = MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1269         } else {
1270           ierr   = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1271         }
1272       }
1273     }
1274   }
1275   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1276   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1277   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1278   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1279 
1280   /*
1281     For each node in the grid: we get the neighbors in the local (on processor ordering
1282     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1283     PETSc ordering.
1284   */
1285   if (!da->prealloc_only) {
1286     ierr = PetscCalloc1(maxcnt,&values);CHKERRQ(ierr);
1287     for (i=xs; i<xs+nx; i++) {
1288 
1289       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1290       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1291 
1292       for (j=ys; j<ys+ny; j++) {
1293         slot = i - gxs + gnx*(j - gys);
1294 
1295         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1296         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1297 
1298         for (k=0; k<nc; k++) {
1299           cnt = 0;
1300           for (l=lstart; l<lend+1; l++) {
1301             for (p=pstart; p<pend+1; p++) {
1302               if (l || p) {
1303                 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
1304                   for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p);
1305                 }
1306               } else {
1307                 if (dfill) {
1308                   for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p);
1309                 } else {
1310                   for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p);
1311                 }
1312               }
1313             }
1314           }
1315           row  = k + nc*(slot);
1316           ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1317         }
1318       }
1319     }
1320     ierr = PetscFree(values);CHKERRQ(ierr);
1321     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1322     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1323     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1324   }
1325   ierr = PetscFree(cols);CHKERRQ(ierr);
1326   PetscFunctionReturn(0);
1327 }
1328 
1329 /* ---------------------------------------------------------------------------------*/
1330 
1331 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da,Mat J)
1332 {
1333   PetscErrorCode         ierr;
1334   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1335   PetscInt               m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL;
1336   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
1337   MPI_Comm               comm;
1338   PetscScalar            *values;
1339   DMBoundaryType         bx,by,bz;
1340   ISLocalToGlobalMapping ltog;
1341   DMDAStencilType        st;
1342   PetscBool              removedups = PETSC_FALSE;
1343 
1344   PetscFunctionBegin;
1345   /*
1346          nc - number of components per grid point
1347          col - number of colors needed in one direction for single component problem
1348 
1349   */
1350   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1351   col  = 2*s + 1;
1352 
1353   /*
1354        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1355        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1356   */
1357   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1358   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1359   if (P == 1 && 2*s >= p) removedups = PETSC_TRUE;
1360 
1361   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1362   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1363   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1364 
1365   ierr = PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols);CHKERRQ(ierr);
1366   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1367 
1368   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1369   /* determine the matrix preallocation information */
1370   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1371   for (i=xs; i<xs+nx; i++) {
1372     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1373     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1374     for (j=ys; j<ys+ny; j++) {
1375       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1376       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1377       for (k=zs; k<zs+nz; k++) {
1378         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1379         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1380 
1381         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1382 
1383         cnt = 0;
1384         for (l=0; l<nc; l++) {
1385           for (ii=istart; ii<iend+1; ii++) {
1386             for (jj=jstart; jj<jend+1; jj++) {
1387               for (kk=kstart; kk<kend+1; kk++) {
1388                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1389                   cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1390                 }
1391               }
1392             }
1393           }
1394           rows[l] = l + nc*(slot);
1395         }
1396         if (removedups) {
1397           ierr = MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1398         } else {
1399           ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1400         }
1401       }
1402     }
1403   }
1404   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1405   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1406   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1407   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1408   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1409 
1410   /*
1411     For each node in the grid: we get the neighbors in the local (on processor ordering
1412     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1413     PETSc ordering.
1414   */
1415   if (!da->prealloc_only) {
1416     ierr = PetscCalloc1(col*col*col*nc*nc*nc,&values);CHKERRQ(ierr);
1417     for (i=xs; i<xs+nx; i++) {
1418       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1419       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1420       for (j=ys; j<ys+ny; j++) {
1421         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1422         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1423         for (k=zs; k<zs+nz; k++) {
1424           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1425           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1426 
1427           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1428 
1429           cnt = 0;
1430           for (l=0; l<nc; l++) {
1431             for (ii=istart; ii<iend+1; ii++) {
1432               for (jj=jstart; jj<jend+1; jj++) {
1433                 for (kk=kstart; kk<kend+1; kk++) {
1434                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1435                     cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1436                   }
1437                 }
1438               }
1439             }
1440             rows[l] = l + nc*(slot);
1441           }
1442           ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1443         }
1444       }
1445     }
1446     ierr = PetscFree(values);CHKERRQ(ierr);
1447     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1448     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1449     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1450   }
1451   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1452   PetscFunctionReturn(0);
1453 }
1454 
1455 /* ---------------------------------------------------------------------------------*/
1456 
1457 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da,Mat J)
1458 {
1459   PetscErrorCode         ierr;
1460   DM_DA                  *dd = (DM_DA*)da->data;
1461   PetscInt               xs,nx,i,j,gxs,gnx,row,k,l;
1462   PetscInt               m,dim,s,*cols = NULL,nc,cnt,maxcnt = 0,*ocols;
1463   PetscInt               *ofill = dd->ofill,*dfill = dd->dfill;
1464   PetscScalar            *values;
1465   DMBoundaryType         bx;
1466   ISLocalToGlobalMapping ltog;
1467   PetscMPIInt            rank,size;
1468 
1469   PetscFunctionBegin;
1470   if (dd->bx == DM_BOUNDARY_PERIODIC) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"With fill provided not implemented with periodic boundary conditions");
1471   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)da),&rank);CHKERRQ(ierr);
1472   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)da),&size);CHKERRQ(ierr);
1473 
1474   /*
1475          nc - number of components per grid point
1476 
1477   */
1478   ierr = DMDAGetInfo(da,&dim,&m,0,0,0,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr);
1479   ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
1480   ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
1481 
1482   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1483   ierr = PetscCalloc2(nx*nc,&cols,nx*nc,&ocols);CHKERRQ(ierr);
1484 
1485   /*
1486         note should be smaller for first and last process with no periodic
1487         does not handle dfill
1488   */
1489   cnt = 0;
1490   /* coupling with process to the left */
1491   for (i=0; i<s; i++) {
1492     for (j=0; j<nc; j++) {
1493       ocols[cnt] = ((!rank) ? 0 : (s - i)*(ofill[j+1] - ofill[j]));
1494       cols[cnt]  = dfill[j+1] - dfill[j] + (s + i)*(ofill[j+1] - ofill[j]);
1495       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1496       cnt++;
1497     }
1498   }
1499   for (i=s; i<nx-s; i++) {
1500     for (j=0; j<nc; j++) {
1501       cols[cnt] = dfill[j+1] - dfill[j] + 2*s*(ofill[j+1] - ofill[j]);
1502       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1503       cnt++;
1504     }
1505   }
1506   /* coupling with process to the right */
1507   for (i=nx-s; i<nx; i++) {
1508     for (j=0; j<nc; j++) {
1509       ocols[cnt] = ((rank == (size-1)) ? 0 : (i - nx + s + 1)*(ofill[j+1] - ofill[j]));
1510       cols[cnt]  = dfill[j+1] - dfill[j] + (s + nx - i - 1)*(ofill[j+1] - ofill[j]);
1511       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1512       cnt++;
1513     }
1514   }
1515 
1516   ierr = MatSeqAIJSetPreallocation(J,0,cols);CHKERRQ(ierr);
1517   ierr = MatMPIAIJSetPreallocation(J,0,cols,0,ocols);CHKERRQ(ierr);
1518   ierr = PetscFree2(cols,ocols);CHKERRQ(ierr);
1519 
1520   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1521   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1522 
1523   /*
1524     For each node in the grid: we get the neighbors in the local (on processor ordering
1525     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1526     PETSc ordering.
1527   */
1528   if (!da->prealloc_only) {
1529     ierr = PetscCalloc2(maxcnt,&values,maxcnt,&cols);CHKERRQ(ierr);
1530 
1531     row = xs*nc;
1532     /* coupling with process to the left */
1533     for (i=xs; i<xs+s; i++) {
1534       for (j=0; j<nc; j++) {
1535         cnt = 0;
1536         if (rank) {
1537           for (l=0; l<s; l++) {
1538             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1539           }
1540         }
1541         if (dfill) {
1542           for (k=dfill[j]; k<dfill[j+1]; k++) {
1543             cols[cnt++] = i*nc + dfill[k];
1544           }
1545         } else {
1546           for (k=0; k<nc; k++) {
1547             cols[cnt++] = i*nc + k;
1548           }
1549         }
1550         for (l=0; l<s; l++) {
1551           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1552         }
1553         ierr = MatSetValues(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1554         row++;
1555       }
1556     }
1557     for (i=xs+s; i<xs+nx-s; i++) {
1558       for (j=0; j<nc; j++) {
1559         cnt = 0;
1560         for (l=0; l<s; l++) {
1561           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1562         }
1563         if (dfill) {
1564           for (k=dfill[j]; k<dfill[j+1]; k++) {
1565             cols[cnt++] = i*nc + dfill[k];
1566           }
1567         } else {
1568           for (k=0; k<nc; k++) {
1569             cols[cnt++] = i*nc + k;
1570           }
1571         }
1572         for (l=0; l<s; l++) {
1573           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1574         }
1575         ierr = MatSetValues(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1576         row++;
1577       }
1578     }
1579     /* coupling with process to the right */
1580     for (i=xs+nx-s; i<xs+nx; i++) {
1581       for (j=0; j<nc; j++) {
1582         cnt = 0;
1583         for (l=0; l<s; l++) {
1584           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1585         }
1586         if (dfill) {
1587           for (k=dfill[j]; k<dfill[j+1]; k++) {
1588             cols[cnt++] = i*nc + dfill[k];
1589           }
1590         } else {
1591           for (k=0; k<nc; k++) {
1592             cols[cnt++] = i*nc + k;
1593           }
1594         }
1595         if (rank < size-1) {
1596           for (l=0; l<s; l++) {
1597             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1598           }
1599         }
1600         ierr = MatSetValues(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1601         row++;
1602       }
1603     }
1604     ierr = PetscFree2(values,cols);CHKERRQ(ierr);
1605     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1606     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1607     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1608   }
1609   PetscFunctionReturn(0);
1610 }
1611 
1612 /* ---------------------------------------------------------------------------------*/
1613 
1614 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da,Mat J)
1615 {
1616   PetscErrorCode         ierr;
1617   PetscInt               xs,nx,i,i1,slot,gxs,gnx;
1618   PetscInt               m,dim,s,*cols = NULL,nc,*rows = NULL,col,cnt,l;
1619   PetscInt               istart,iend;
1620   PetscScalar            *values;
1621   DMBoundaryType         bx;
1622   ISLocalToGlobalMapping ltog;
1623 
1624   PetscFunctionBegin;
1625   /*
1626          nc - number of components per grid point
1627          col - number of colors needed in one direction for single component problem
1628 
1629   */
1630   ierr = DMDAGetInfo(da,&dim,&m,0,0,0,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr);
1631   col  = 2*s + 1;
1632 
1633   ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
1634   ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
1635 
1636   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1637   ierr = MatSeqAIJSetPreallocation(J,col*nc,0);CHKERRQ(ierr);
1638   ierr = MatMPIAIJSetPreallocation(J,col*nc,0,col*nc,0);CHKERRQ(ierr);
1639 
1640   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1641   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1642 
1643   /*
1644     For each node in the grid: we get the neighbors in the local (on processor ordering
1645     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1646     PETSc ordering.
1647   */
1648   if (!da->prealloc_only) {
1649     ierr = PetscMalloc2(nc,&rows,col*nc*nc,&cols);CHKERRQ(ierr);
1650     ierr = PetscCalloc1(col*nc*nc,&values);CHKERRQ(ierr);
1651     for (i=xs; i<xs+nx; i++) {
1652       istart = PetscMax(-s,gxs - i);
1653       iend   = PetscMin(s,gxs + gnx - i - 1);
1654       slot   = i - gxs;
1655 
1656       cnt = 0;
1657       for (l=0; l<nc; l++) {
1658         for (i1=istart; i1<iend+1; i1++) {
1659           cols[cnt++] = l + nc*(slot + i1);
1660         }
1661         rows[l] = l + nc*(slot);
1662       }
1663       ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1664     }
1665     ierr = PetscFree(values);CHKERRQ(ierr);
1666     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1667     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1668     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1669     ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1670   }
1671   PetscFunctionReturn(0);
1672 }
1673 
1674 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da,Mat J)
1675 {
1676   PetscErrorCode         ierr;
1677   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1678   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
1679   PetscInt               istart,iend,jstart,jend,ii,jj;
1680   MPI_Comm               comm;
1681   PetscScalar            *values;
1682   DMBoundaryType         bx,by;
1683   DMDAStencilType        st;
1684   ISLocalToGlobalMapping ltog;
1685 
1686   PetscFunctionBegin;
1687   /*
1688      nc - number of components per grid point
1689      col - number of colors needed in one direction for single component problem
1690   */
1691   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
1692   col  = 2*s + 1;
1693 
1694   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
1695   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
1696   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1697 
1698   ierr = PetscMalloc1(col*col*nc*nc,&cols);CHKERRQ(ierr);
1699 
1700   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1701 
1702   /* determine the matrix preallocation information */
1703   ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
1704   for (i=xs; i<xs+nx; i++) {
1705     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1706     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1707     for (j=ys; j<ys+ny; j++) {
1708       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1709       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1710       slot   = i - gxs + gnx*(j - gys);
1711 
1712       /* Find block columns in block row */
1713       cnt = 0;
1714       for (ii=istart; ii<iend+1; ii++) {
1715         for (jj=jstart; jj<jend+1; jj++) {
1716           if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1717             cols[cnt++] = slot + ii + gnx*jj;
1718           }
1719         }
1720       }
1721       ierr = MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1722     }
1723   }
1724   ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1725   ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1726   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1727 
1728   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1729 
1730   /*
1731     For each node in the grid: we get the neighbors in the local (on processor ordering
1732     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1733     PETSc ordering.
1734   */
1735   if (!da->prealloc_only) {
1736     ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr);
1737     for (i=xs; i<xs+nx; i++) {
1738       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1739       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1740       for (j=ys; j<ys+ny; j++) {
1741         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1742         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1743         slot = i - gxs + gnx*(j - gys);
1744         cnt  = 0;
1745         for (ii=istart; ii<iend+1; ii++) {
1746           for (jj=jstart; jj<jend+1; jj++) {
1747             if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1748               cols[cnt++] = slot + ii + gnx*jj;
1749             }
1750           }
1751         }
1752         ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1753       }
1754     }
1755     ierr = PetscFree(values);CHKERRQ(ierr);
1756     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1757     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1758     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1759   }
1760   ierr = PetscFree(cols);CHKERRQ(ierr);
1761   PetscFunctionReturn(0);
1762 }
1763 
1764 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da,Mat J)
1765 {
1766   PetscErrorCode         ierr;
1767   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1768   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
1769   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1770   MPI_Comm               comm;
1771   PetscScalar            *values;
1772   DMBoundaryType         bx,by,bz;
1773   DMDAStencilType        st;
1774   ISLocalToGlobalMapping ltog;
1775 
1776   PetscFunctionBegin;
1777   /*
1778          nc - number of components per grid point
1779          col - number of colors needed in one direction for single component problem
1780 
1781   */
1782   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1783   col  = 2*s + 1;
1784 
1785   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1786   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1787   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1788 
1789   ierr = PetscMalloc1(col*col*col,&cols);CHKERRQ(ierr);
1790 
1791   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1792 
1793   /* determine the matrix preallocation information */
1794   ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1795   for (i=xs; i<xs+nx; i++) {
1796     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1797     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1798     for (j=ys; j<ys+ny; j++) {
1799       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1800       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1801       for (k=zs; k<zs+nz; k++) {
1802         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1803         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1804 
1805         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1806 
1807         /* Find block columns in block row */
1808         cnt = 0;
1809         for (ii=istart; ii<iend+1; ii++) {
1810           for (jj=jstart; jj<jend+1; jj++) {
1811             for (kk=kstart; kk<kend+1; kk++) {
1812               if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1813                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1814               }
1815             }
1816           }
1817         }
1818         ierr = MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1819       }
1820     }
1821   }
1822   ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1823   ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1824   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1825 
1826   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1827 
1828   /*
1829     For each node in the grid: we get the neighbors in the local (on processor ordering
1830     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1831     PETSc ordering.
1832   */
1833   if (!da->prealloc_only) {
1834     ierr = PetscCalloc1(col*col*col*nc*nc,&values);CHKERRQ(ierr);
1835     for (i=xs; i<xs+nx; i++) {
1836       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1837       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1838       for (j=ys; j<ys+ny; j++) {
1839         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1840         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1841         for (k=zs; k<zs+nz; k++) {
1842           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1843           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1844 
1845           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1846 
1847           cnt = 0;
1848           for (ii=istart; ii<iend+1; ii++) {
1849             for (jj=jstart; jj<jend+1; jj++) {
1850               for (kk=kstart; kk<kend+1; kk++) {
1851                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1852                   cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1853                 }
1854               }
1855             }
1856           }
1857           ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1858         }
1859       }
1860     }
1861     ierr = PetscFree(values);CHKERRQ(ierr);
1862     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1863     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1864     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1865   }
1866   ierr = PetscFree(cols);CHKERRQ(ierr);
1867   PetscFunctionReturn(0);
1868 }
1869 
1870 /*
1871   This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to
1872   identify in the local ordering with periodic domain.
1873 */
1874 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog,PetscInt *row,PetscInt *cnt,PetscInt col[])
1875 {
1876   PetscErrorCode ierr;
1877   PetscInt       i,n;
1878 
1879   PetscFunctionBegin;
1880   ierr = ISLocalToGlobalMappingApplyBlock(ltog,1,row,row);CHKERRQ(ierr);
1881   ierr = ISLocalToGlobalMappingApplyBlock(ltog,*cnt,col,col);CHKERRQ(ierr);
1882   for (i=0,n=0; i<*cnt; i++) {
1883     if (col[i] >= *row) col[n++] = col[i];
1884   }
1885   *cnt = n;
1886   PetscFunctionReturn(0);
1887 }
1888 
1889 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da,Mat J)
1890 {
1891   PetscErrorCode         ierr;
1892   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1893   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
1894   PetscInt               istart,iend,jstart,jend,ii,jj;
1895   MPI_Comm               comm;
1896   PetscScalar            *values;
1897   DMBoundaryType         bx,by;
1898   DMDAStencilType        st;
1899   ISLocalToGlobalMapping ltog;
1900 
1901   PetscFunctionBegin;
1902   /*
1903      nc - number of components per grid point
1904      col - number of colors needed in one direction for single component problem
1905   */
1906   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
1907   col  = 2*s + 1;
1908 
1909   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
1910   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
1911   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1912 
1913   ierr = PetscMalloc1(col*col*nc*nc,&cols);CHKERRQ(ierr);
1914 
1915   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1916 
1917   /* determine the matrix preallocation information */
1918   ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
1919   for (i=xs; i<xs+nx; i++) {
1920     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1921     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1922     for (j=ys; j<ys+ny; j++) {
1923       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1924       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1925       slot   = i - gxs + gnx*(j - gys);
1926 
1927       /* Find block columns in block row */
1928       cnt = 0;
1929       for (ii=istart; ii<iend+1; ii++) {
1930         for (jj=jstart; jj<jend+1; jj++) {
1931           if (st == DMDA_STENCIL_BOX || !ii || !jj) {
1932             cols[cnt++] = slot + ii + gnx*jj;
1933           }
1934         }
1935       }
1936       ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
1937       ierr = MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
1938     }
1939   }
1940   ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1941   ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1942   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1943 
1944   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1945 
1946   /*
1947     For each node in the grid: we get the neighbors in the local (on processor ordering
1948     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1949     PETSc ordering.
1950   */
1951   if (!da->prealloc_only) {
1952     ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr);
1953     for (i=xs; i<xs+nx; i++) {
1954       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1955       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1956       for (j=ys; j<ys+ny; j++) {
1957         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1958         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1959         slot   = i - gxs + gnx*(j - gys);
1960 
1961         /* Find block columns in block row */
1962         cnt = 0;
1963         for (ii=istart; ii<iend+1; ii++) {
1964           for (jj=jstart; jj<jend+1; jj++) {
1965             if (st == DMDA_STENCIL_BOX || !ii || !jj) {
1966               cols[cnt++] = slot + ii + gnx*jj;
1967             }
1968           }
1969         }
1970         ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
1971         ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1972       }
1973     }
1974     ierr = PetscFree(values);CHKERRQ(ierr);
1975     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1976     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1977     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1978   }
1979   ierr = PetscFree(cols);CHKERRQ(ierr);
1980   PetscFunctionReturn(0);
1981 }
1982 
1983 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da,Mat J)
1984 {
1985   PetscErrorCode         ierr;
1986   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1987   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
1988   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1989   MPI_Comm               comm;
1990   PetscScalar            *values;
1991   DMBoundaryType         bx,by,bz;
1992   DMDAStencilType        st;
1993   ISLocalToGlobalMapping ltog;
1994 
1995   PetscFunctionBegin;
1996   /*
1997      nc - number of components per grid point
1998      col - number of colors needed in one direction for single component problem
1999   */
2000   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
2001   col  = 2*s + 1;
2002 
2003   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
2004   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
2005   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
2006 
2007   /* create the matrix */
2008   ierr = PetscMalloc1(col*col*col,&cols);CHKERRQ(ierr);
2009 
2010   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
2011 
2012   /* determine the matrix preallocation information */
2013   ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
2014   for (i=xs; i<xs+nx; i++) {
2015     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2016     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2017     for (j=ys; j<ys+ny; j++) {
2018       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2019       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2020       for (k=zs; k<zs+nz; k++) {
2021         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2022         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2023 
2024         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2025 
2026         /* Find block columns in block row */
2027         cnt = 0;
2028         for (ii=istart; ii<iend+1; ii++) {
2029           for (jj=jstart; jj<jend+1; jj++) {
2030             for (kk=kstart; kk<kend+1; kk++) {
2031               if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
2032                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
2033               }
2034             }
2035           }
2036         }
2037         ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
2038         ierr = MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
2039       }
2040     }
2041   }
2042   ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
2043   ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
2044   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2045 
2046   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
2047 
2048   /*
2049     For each node in the grid: we get the neighbors in the local (on processor ordering
2050     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2051     PETSc ordering.
2052   */
2053   if (!da->prealloc_only) {
2054     ierr = PetscCalloc1(col*col*col*nc*nc,&values);CHKERRQ(ierr);
2055     for (i=xs; i<xs+nx; i++) {
2056       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2057       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2058       for (j=ys; j<ys+ny; j++) {
2059         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2060         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2061         for (k=zs; k<zs+nz; k++) {
2062           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2063           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2064 
2065           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2066 
2067           cnt = 0;
2068           for (ii=istart; ii<iend+1; ii++) {
2069             for (jj=jstart; jj<jend+1; jj++) {
2070               for (kk=kstart; kk<kend+1; kk++) {
2071                 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
2072                   cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
2073                 }
2074               }
2075             }
2076           }
2077           ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
2078           ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
2079         }
2080       }
2081     }
2082     ierr = PetscFree(values);CHKERRQ(ierr);
2083     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2084     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2085     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2086   }
2087   ierr = PetscFree(cols);CHKERRQ(ierr);
2088   PetscFunctionReturn(0);
2089 }
2090 
2091 /* ---------------------------------------------------------------------------------*/
2092 
2093 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da,Mat J)
2094 {
2095   PetscErrorCode         ierr;
2096   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
2097   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt, maxcnt = 0,l,p,*dnz,*onz;
2098   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
2099   DM_DA                  *dd = (DM_DA*)da->data;
2100   PetscInt               ifill_col,*dfill = dd->dfill,*ofill = dd->ofill;
2101   MPI_Comm               comm;
2102   PetscScalar            *values;
2103   DMBoundaryType         bx,by,bz;
2104   ISLocalToGlobalMapping ltog;
2105   DMDAStencilType        st;
2106   PetscBool              removedups = PETSC_FALSE;
2107 
2108   PetscFunctionBegin;
2109   /*
2110          nc - number of components per grid point
2111          col - number of colors needed in one direction for single component problem
2112 
2113   */
2114   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
2115   col  = 2*s + 1;
2116   if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
2117                  by 2*stencil_width + 1\n");
2118   if (by == DM_BOUNDARY_PERIODIC && (n % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
2119                  by 2*stencil_width + 1\n");
2120   if (bz == DM_BOUNDARY_PERIODIC && (p % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
2121                  by 2*stencil_width + 1\n");
2122 
2123   /*
2124        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
2125        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
2126   */
2127   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
2128   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
2129   if (P == 1 && 2*s >= p) removedups = PETSC_TRUE;
2130 
2131   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
2132   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
2133   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
2134 
2135   ierr = PetscMalloc1(col*col*col*nc,&cols);CHKERRQ(ierr);
2136   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
2137 
2138   /* determine the matrix preallocation information */
2139   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
2140 
2141   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
2142   for (i=xs; i<xs+nx; i++) {
2143     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2144     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2145     for (j=ys; j<ys+ny; j++) {
2146       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2147       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2148       for (k=zs; k<zs+nz; k++) {
2149         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2150         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2151 
2152         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2153 
2154         for (l=0; l<nc; l++) {
2155           cnt = 0;
2156           for (ii=istart; ii<iend+1; ii++) {
2157             for (jj=jstart; jj<jend+1; jj++) {
2158               for (kk=kstart; kk<kend+1; kk++) {
2159                 if (ii || jj || kk) {
2160                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
2161                     for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2162                   }
2163                 } else {
2164                   if (dfill) {
2165                     for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2166                   } else {
2167                     for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2168                   }
2169                 }
2170               }
2171             }
2172           }
2173           row  = l + nc*(slot);
2174           maxcnt = PetscMax(maxcnt,cnt);
2175           if (removedups) {
2176             ierr = MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
2177           } else {
2178             ierr = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
2179           }
2180         }
2181       }
2182     }
2183   }
2184   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
2185   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
2186   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2187   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
2188 
2189   /*
2190     For each node in the grid: we get the neighbors in the local (on processor ordering
2191     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2192     PETSc ordering.
2193   */
2194   if (!da->prealloc_only) {
2195     ierr = PetscCalloc1(maxcnt,&values);CHKERRQ(ierr);
2196     for (i=xs; i<xs+nx; i++) {
2197       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2198       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2199       for (j=ys; j<ys+ny; j++) {
2200         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2201         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2202         for (k=zs; k<zs+nz; k++) {
2203           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2204           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2205 
2206           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2207 
2208           for (l=0; l<nc; l++) {
2209             cnt = 0;
2210             for (ii=istart; ii<iend+1; ii++) {
2211               for (jj=jstart; jj<jend+1; jj++) {
2212                 for (kk=kstart; kk<kend+1; kk++) {
2213                   if (ii || jj || kk) {
2214                     if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
2215                       for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2216                     }
2217                   } else {
2218                     if (dfill) {
2219                       for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2220                     } else {
2221                       for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2222                     }
2223                   }
2224                 }
2225               }
2226             }
2227             row  = l + nc*(slot);
2228             ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
2229           }
2230         }
2231       }
2232     }
2233     ierr = PetscFree(values);CHKERRQ(ierr);
2234     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2235     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2236     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2237   }
2238   ierr = PetscFree(cols);CHKERRQ(ierr);
2239   PetscFunctionReturn(0);
2240 }
2241