xref: /petsc/src/dm/impls/da/fdda.c (revision 732e2eb99afef1b984167973f126bbed28c96039)
1 
2 #include <private/daimpl.h> /*I      "petscdmda.h"     I*/
3 #include <petscmat.h>         /*I      "petscmat.h"    I*/
4 #include <private/matimpl.h>
5 
6 extern PetscErrorCode DMGetColoring_DA_1d_MPIAIJ(DM,ISColoringType,ISColoring *);
7 extern PetscErrorCode DMGetColoring_DA_2d_MPIAIJ(DM,ISColoringType,ISColoring *);
8 extern PetscErrorCode DMGetColoring_DA_2d_5pt_MPIAIJ(DM,ISColoringType,ISColoring *);
9 extern PetscErrorCode DMGetColoring_DA_3d_MPIAIJ(DM,ISColoringType,ISColoring *);
10 
11 /*
12    For ghost i that may be negative or greater than the upper bound this
13   maps it into the 0:m-1 range using periodicity
14 */
15 #define SetInRange(i,m) ((i < 0) ? m+i:((i >= m) ? i-m:i))
16 
17 #undef __FUNCT__
18 #define __FUNCT__ "DMDASetBlockFills_Private"
19 static PetscErrorCode DMDASetBlockFills_Private(PetscInt *dfill,PetscInt w,PetscInt **rfill)
20 {
21   PetscErrorCode ierr;
22   PetscInt       i,j,nz,*fill;
23 
24   PetscFunctionBegin;
25   if (!dfill) PetscFunctionReturn(0);
26 
27   /* count number nonzeros */
28   nz = 0;
29   for (i=0; i<w; i++) {
30     for (j=0; j<w; j++) {
31       if (dfill[w*i+j]) nz++;
32     }
33   }
34   ierr = PetscMalloc((nz + w + 1)*sizeof(PetscInt),&fill);CHKERRQ(ierr);
35   /* construct modified CSR storage of nonzero structure */
36   nz = w + 1;
37   for (i=0; i<w; i++) {
38     fill[i] = nz;
39     for (j=0; j<w; j++) {
40       if (dfill[w*i+j]) {
41 	fill[nz] = j;
42 	nz++;
43       }
44     }
45   }
46   fill[w] = nz;
47 
48   *rfill = fill;
49   PetscFunctionReturn(0);
50 }
51 
52 #undef __FUNCT__
53 #define __FUNCT__ "DMDASetBlockFills"
54 /*@
55     DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem
56     of the matrix returned by DMGetMatrix().
57 
58     Logically Collective on DMDA
59 
60     Input Parameter:
61 +   da - the distributed array
62 .   dfill - the fill pattern in the diagonal block (may be PETSC_NULL, means use dense block)
63 -   ofill - the fill pattern in the off-diagonal blocks
64 
65 
66     Level: developer
67 
68     Notes: This only makes sense when you are doing multicomponent problems but using the
69        MPIAIJ matrix format
70 
71            The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries
72        representing coupling and 0 entries for missing coupling. For example
73 $             dfill[9] = {1, 0, 0,
74 $                         1, 1, 0,
75 $                         0, 1, 1}
76        means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with
77        itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the
78        diagonal block).
79 
80      DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
81      can be represented in the dfill, ofill format
82 
83    Contributed by Glenn Hammond
84 
85 .seealso DMGetMatrix(), DMDASetGetMatrix(), DMDASetMatPreallocateOnly()
86 
87 @*/
88 PetscErrorCode  DMDASetBlockFills(DM da,PetscInt *dfill,PetscInt *ofill)
89 {
90   DM_DA          *dd = (DM_DA*)da->data;
91   PetscErrorCode ierr;
92 
93   PetscFunctionBegin;
94   ierr = DMDASetBlockFills_Private(dfill,dd->w,&dd->dfill);CHKERRQ(ierr);
95   ierr = DMDASetBlockFills_Private(ofill,dd->w,&dd->ofill);CHKERRQ(ierr);
96   PetscFunctionReturn(0);
97 }
98 
99 
100 #undef __FUNCT__
101 #define __FUNCT__ "DMGetColoring_DA"
102 PetscErrorCode  DMGetColoring_DA(DM da,ISColoringType ctype,const MatType mtype,ISColoring *coloring)
103 {
104   PetscErrorCode   ierr;
105   PetscInt         dim,m,n,p,nc;
106   DMDABoundaryType bx,by,bz;
107   MPI_Comm         comm;
108   PetscMPIInt      size;
109   PetscBool        isBAIJ;
110   DM_DA            *dd = (DM_DA*)da->data;
111 
112   PetscFunctionBegin;
113   /*
114                                   m
115           ------------------------------------------------------
116          |                                                     |
117          |                                                     |
118          |               ----------------------                |
119          |               |                    |                |
120       n  |           yn  |                    |                |
121          |               |                    |                |
122          |               .---------------------                |
123          |             (xs,ys)     xn                          |
124          |            .                                        |
125          |         (gxs,gys)                                   |
126          |                                                     |
127           -----------------------------------------------------
128   */
129 
130   /*
131          nc - number of components per grid point
132          col - number of colors needed in one direction for single component problem
133 
134   */
135   ierr = DMDAGetInfo(da,&dim,0,0,0,&m,&n,&p,&nc,0,&bx,&by,&bz,0);CHKERRQ(ierr);
136 
137   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
138   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
139   if (ctype == IS_COLORING_GHOSTED){
140     if (size == 1) {
141       ctype = IS_COLORING_GLOBAL;
142     } else if (dim > 1){
143       if ((m==1 && bx == DMDA_BOUNDARY_PERIODIC) || (n==1 && by == DMDA_BOUNDARY_PERIODIC) || (p==1 && bz == DMDA_BOUNDARY_PERIODIC)){
144         SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"IS_COLORING_GHOSTED cannot be used for periodic boundary condition having both ends of the domain  on the same process");
145       }
146     }
147   }
148 
149   /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ
150      matrices is for the blocks, not the individual matrix elements  */
151   ierr = PetscStrcmp(mtype,MATBAIJ,&isBAIJ);CHKERRQ(ierr);
152   if (!isBAIJ) {ierr = PetscStrcmp(mtype,MATMPIBAIJ,&isBAIJ);CHKERRQ(ierr);}
153   if (!isBAIJ) {ierr = PetscStrcmp(mtype,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);}
154   if (isBAIJ) {
155     dd->w = 1;
156     dd->xs = dd->xs/nc;
157     dd->xe = dd->xe/nc;
158     dd->Xs = dd->Xs/nc;
159     dd->Xe = dd->Xe/nc;
160   }
161 
162   /*
163      We do not provide a getcoloring function in the DMDA operations because
164    the basic DMDA does not know about matrices. We think of DMDA as being more
165    more low-level then matrices.
166   */
167   if (dim == 1) {
168     ierr = DMGetColoring_DA_1d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
169   } else if (dim == 2) {
170     ierr =  DMGetColoring_DA_2d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
171   } else if (dim == 3) {
172     ierr =  DMGetColoring_DA_3d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
173   } else {
174     SETERRQ1(((PetscObject)da)->comm,PETSC_ERR_SUP,"Not done for %D dimension, send us mail petsc-maint@mcs.anl.gov for code",dim);
175   }
176   if (isBAIJ) {
177     dd->w = nc;
178     dd->xs = dd->xs*nc;
179     dd->xe = dd->xe*nc;
180     dd->Xs = dd->Xs*nc;
181     dd->Xe = dd->Xe*nc;
182   }
183   PetscFunctionReturn(0);
184 }
185 
186 /* ---------------------------------------------------------------------------------*/
187 
188 #undef __FUNCT__
189 #define __FUNCT__ "DMGetColoring_DA_2d_MPIAIJ"
190 PetscErrorCode DMGetColoring_DA_2d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
191 {
192   PetscErrorCode         ierr;
193   PetscInt               xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,M,N,dim,s,k,nc,col;
194   PetscInt               ncolors;
195   MPI_Comm               comm;
196   DMDABoundaryType       bx,by;
197   DMDAStencilType        st;
198   ISColoringValue        *colors;
199   DM_DA                  *dd = (DM_DA*)da->data;
200 
201   PetscFunctionBegin;
202   /*
203          nc - number of components per grid point
204          col - number of colors needed in one direction for single component problem
205 
206   */
207   ierr = DMDAGetInfo(da,&dim,&m,&n,0,&M,&N,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
208   col    = 2*s + 1;
209   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
210   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
211   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
212 
213   /* special case as taught to us by Paul Hovland */
214   if (st == DMDA_STENCIL_STAR && s == 1) {
215     ierr = DMGetColoring_DA_2d_5pt_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
216   } else {
217 
218     if (bx == DMDA_BOUNDARY_PERIODIC && (m % col)){
219       SETERRQ2(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X (%d) is divisible\n\
220                  by 2*stencil_width + 1 (%d)\n", m, col);
221     }
222     if (by == DMDA_BOUNDARY_PERIODIC && (n % col)){
223       SETERRQ2(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y (%d) is divisible\n\
224                  by 2*stencil_width + 1 (%d)\n", n, col);
225     }
226     if (ctype == IS_COLORING_GLOBAL) {
227       if (!dd->localcoloring) {
228 	ierr = PetscMalloc(nc*nx*ny*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
229 	ii = 0;
230 	for (j=ys; j<ys+ny; j++) {
231 	  for (i=xs; i<xs+nx; i++) {
232 	    for (k=0; k<nc; k++) {
233 	      colors[ii++] = k + nc*((i % col) + col*(j % col));
234 	    }
235 	  }
236 	}
237         ncolors = nc + nc*(col-1 + col*(col-1));
238 	ierr = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,&dd->localcoloring);CHKERRQ(ierr);
239       }
240       *coloring = dd->localcoloring;
241     } else if (ctype == IS_COLORING_GHOSTED) {
242       if (!dd->ghostedcoloring) {
243 	ierr = PetscMalloc(nc*gnx*gny*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
244 	ii = 0;
245 	for (j=gys; j<gys+gny; j++) {
246 	  for (i=gxs; i<gxs+gnx; i++) {
247 	    for (k=0; k<nc; k++) {
248 	      /* the complicated stuff is to handle periodic boundaries */
249 	      colors[ii++] = k + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col));
250 	    }
251 	  }
252 	}
253         ncolors = nc + nc*(col - 1 + col*(col-1));
254 	ierr = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,&dd->ghostedcoloring);CHKERRQ(ierr);
255         /* PetscIntView(ncolors,(PetscInt *)colors,0); */
256 
257 	ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_GHOSTED);CHKERRQ(ierr);
258       }
259       *coloring = dd->ghostedcoloring;
260     } else SETERRQ1(((PetscObject)da)->comm,PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
261   }
262   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
263   PetscFunctionReturn(0);
264 }
265 
266 /* ---------------------------------------------------------------------------------*/
267 
268 #undef __FUNCT__
269 #define __FUNCT__ "DMGetColoring_DA_3d_MPIAIJ"
270 PetscErrorCode DMGetColoring_DA_3d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
271 {
272   PetscErrorCode    ierr;
273   PetscInt          xs,ys,nx,ny,i,j,gxs,gys,gnx,gny,m,n,p,dim,s,k,nc,col,zs,gzs,ii,l,nz,gnz,M,N,P;
274   PetscInt          ncolors;
275   MPI_Comm          comm;
276   DMDABoundaryType  bx,by,bz;
277   DMDAStencilType   st;
278   ISColoringValue   *colors;
279   DM_DA             *dd = (DM_DA*)da->data;
280 
281   PetscFunctionBegin;
282   /*
283          nc - number of components per grid point
284          col - number of colors needed in one direction for single component problem
285 
286   */
287   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
288   col    = 2*s + 1;
289   if (bx == DMDA_BOUNDARY_PERIODIC && (m % col)){
290     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
291                  by 2*stencil_width + 1\n");
292   }
293   if (by == DMDA_BOUNDARY_PERIODIC && (n % col)){
294     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
295                  by 2*stencil_width + 1\n");
296   }
297   if (bz == DMDA_BOUNDARY_PERIODIC && (p % col)){
298     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
299                  by 2*stencil_width + 1\n");
300   }
301 
302   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
303   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
304   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
305 
306   /* create the coloring */
307   if (ctype == IS_COLORING_GLOBAL) {
308     if (!dd->localcoloring) {
309       ierr = PetscMalloc(nc*nx*ny*nz*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
310       ii = 0;
311       for (k=zs; k<zs+nz; k++) {
312         for (j=ys; j<ys+ny; j++) {
313           for (i=xs; i<xs+nx; i++) {
314             for (l=0; l<nc; l++) {
315               colors[ii++] = l + nc*((i % col) + col*(j % col) + col*col*(k % col));
316             }
317           }
318         }
319       }
320       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
321       ierr = ISColoringCreate(comm,ncolors,nc*nx*ny*nz,colors,&dd->localcoloring);CHKERRQ(ierr);
322     }
323     *coloring = dd->localcoloring;
324   } else if (ctype == IS_COLORING_GHOSTED) {
325     if (!dd->ghostedcoloring) {
326       ierr = PetscMalloc(nc*gnx*gny*gnz*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
327       ii = 0;
328       for (k=gzs; k<gzs+gnz; k++) {
329         for (j=gys; j<gys+gny; j++) {
330           for (i=gxs; i<gxs+gnx; i++) {
331             for (l=0; l<nc; l++) {
332               /* the complicated stuff is to handle periodic boundaries */
333               colors[ii++] = l + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col) + col*col*(SetInRange(k,p) % col));
334             }
335           }
336         }
337       }
338       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
339       ierr = ISColoringCreate(comm,ncolors,nc*gnx*gny*gnz,colors,&dd->ghostedcoloring);CHKERRQ(ierr);
340       ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_GHOSTED);CHKERRQ(ierr);
341     }
342     *coloring = dd->ghostedcoloring;
343   } else SETERRQ1(((PetscObject)da)->comm,PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
344   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
345   PetscFunctionReturn(0);
346 }
347 
348 /* ---------------------------------------------------------------------------------*/
349 
350 #undef __FUNCT__
351 #define __FUNCT__ "DMGetColoring_DA_1d_MPIAIJ"
352 PetscErrorCode DMGetColoring_DA_1d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
353 {
354   PetscErrorCode    ierr;
355   PetscInt          xs,nx,i,i1,gxs,gnx,l,m,M,dim,s,nc,col;
356   PetscInt          ncolors;
357   MPI_Comm          comm;
358   DMDABoundaryType  bx;
359   ISColoringValue   *colors;
360   DM_DA             *dd = (DM_DA*)da->data;
361 
362   PetscFunctionBegin;
363   /*
364          nc - number of components per grid point
365          col - number of colors needed in one direction for single component problem
366 
367   */
368   ierr = DMDAGetInfo(da,&dim,&m,0,0,&M,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr);
369   col    = 2*s + 1;
370 
371   if (bx == DMDA_BOUNDARY_PERIODIC && (m % col)) {
372     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points is divisible\n\
373                  by 2*stencil_width + 1\n");
374   }
375 
376   ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
377   ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
378   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
379 
380   /* create the coloring */
381   if (ctype == IS_COLORING_GLOBAL) {
382     if (!dd->localcoloring) {
383       ierr = PetscMalloc(nc*nx*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
384       i1 = 0;
385       for (i=xs; i<xs+nx; i++) {
386         for (l=0; l<nc; l++) {
387           colors[i1++] = l + nc*(i % col);
388         }
389       }
390       ncolors = nc + nc*(col-1);
391       ierr = ISColoringCreate(comm,ncolors,nc*nx,colors,&dd->localcoloring);CHKERRQ(ierr);
392     }
393     *coloring = dd->localcoloring;
394   } else if (ctype == IS_COLORING_GHOSTED) {
395     if (!dd->ghostedcoloring) {
396       ierr = PetscMalloc(nc*gnx*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
397       i1 = 0;
398       for (i=gxs; i<gxs+gnx; i++) {
399         for (l=0; l<nc; l++) {
400           /* the complicated stuff is to handle periodic boundaries */
401           colors[i1++] = l + nc*(SetInRange(i,m) % col);
402         }
403       }
404       ncolors = nc + nc*(col-1);
405       ierr = ISColoringCreate(comm,ncolors,nc*gnx,colors,&dd->ghostedcoloring);CHKERRQ(ierr);
406       ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_GHOSTED);CHKERRQ(ierr);
407     }
408     *coloring = dd->ghostedcoloring;
409   } else SETERRQ1(((PetscObject)da)->comm,PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
410   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
411   PetscFunctionReturn(0);
412 }
413 
414 #undef __FUNCT__
415 #define __FUNCT__ "DMGetColoring_DA_2d_5pt_MPIAIJ"
416 PetscErrorCode DMGetColoring_DA_2d_5pt_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
417 {
418   PetscErrorCode    ierr;
419   PetscInt          xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,dim,s,k,nc;
420   PetscInt          ncolors;
421   MPI_Comm          comm;
422   DMDABoundaryType  bx,by;
423   ISColoringValue   *colors;
424   DM_DA             *dd = (DM_DA*)da->data;
425 
426   PetscFunctionBegin;
427   /*
428          nc - number of components per grid point
429          col - number of colors needed in one direction for single component problem
430 
431   */
432   ierr   = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,0);CHKERRQ(ierr);
433   ierr   = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
434   ierr   = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
435   ierr   = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
436 
437   if (bx == DMDA_BOUNDARY_PERIODIC && (m % 5)){
438     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
439                  by 5\n");
440   }
441   if (by == DMDA_BOUNDARY_PERIODIC && (n % 5)){
442     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
443                  by 5\n");
444   }
445 
446   /* create the coloring */
447   if (ctype == IS_COLORING_GLOBAL) {
448     if (!dd->localcoloring) {
449       ierr = PetscMalloc(nc*nx*ny*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
450       ii = 0;
451       for (j=ys; j<ys+ny; j++) {
452 	for (i=xs; i<xs+nx; i++) {
453 	  for (k=0; k<nc; k++) {
454 	    colors[ii++] = k + nc*((3*j+i) % 5);
455 	  }
456 	}
457       }
458       ncolors = 5*nc;
459       ierr = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,&dd->localcoloring);CHKERRQ(ierr);
460     }
461     *coloring = dd->localcoloring;
462   } else if (ctype == IS_COLORING_GHOSTED) {
463     if (!dd->ghostedcoloring) {
464       ierr = PetscMalloc(nc*gnx*gny*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
465       ii = 0;
466       for (j=gys; j<gys+gny; j++) {
467 	for (i=gxs; i<gxs+gnx; i++) {
468 	  for (k=0; k<nc; k++) {
469 	    colors[ii++] = k + nc*((3*SetInRange(j,n) + SetInRange(i,m)) % 5);
470 	  }
471 	}
472       }
473       ncolors = 5*nc;
474       ierr = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,&dd->ghostedcoloring);CHKERRQ(ierr);
475       ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_GHOSTED);CHKERRQ(ierr);
476     }
477     *coloring = dd->ghostedcoloring;
478   } else SETERRQ1(((PetscObject)da)->comm,PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
479   PetscFunctionReturn(0);
480 }
481 
482 /* =========================================================================== */
483 extern PetscErrorCode DMGetMatrix_DA_1d_MPIAIJ(DM,Mat);
484 extern PetscErrorCode DMGetMatrix_DA_2d_MPIAIJ(DM,Mat);
485 extern PetscErrorCode DMGetMatrix_DA_2d_MPIAIJ_Fill(DM,Mat);
486 extern PetscErrorCode DMGetMatrix_DA_3d_MPIAIJ(DM,Mat);
487 extern PetscErrorCode DMGetMatrix_DA_3d_MPIAIJ_Fill(DM,Mat);
488 extern PetscErrorCode DMGetMatrix_DA_2d_MPIBAIJ(DM,Mat);
489 extern PetscErrorCode DMGetMatrix_DA_3d_MPIBAIJ(DM,Mat);
490 extern PetscErrorCode DMGetMatrix_DA_2d_MPISBAIJ(DM,Mat);
491 extern PetscErrorCode DMGetMatrix_DA_3d_MPISBAIJ(DM,Mat);
492 
493 #undef __FUNCT__
494 #define __FUNCT__ "MatSetDM"
495 /*@
496    MatSetDM - Sets the DMDA that is to be used by the HYPRE_StructMatrix PETSc matrix
497 
498    Logically Collective on Mat
499 
500    Input Parameters:
501 +  mat - the matrix
502 -  da - the da
503 
504    Level: intermediate
505 
506 @*/
507 PetscErrorCode  MatSetDM(Mat mat,DM da)
508 {
509   PetscErrorCode ierr;
510 
511   PetscFunctionBegin;
512   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
513   PetscValidHeaderSpecific(da,DM_CLASSID,1);
514   ierr = PetscTryMethod(mat,"MatSetDM_C",(Mat,DM),(mat,da));CHKERRQ(ierr);
515   PetscFunctionReturn(0);
516 }
517 
518 EXTERN_C_BEGIN
519 #undef __FUNCT__
520 #define __FUNCT__ "MatView_MPI_DA"
521 PetscErrorCode  MatView_MPI_DA(Mat A,PetscViewer viewer)
522 {
523   DM             da;
524   PetscErrorCode ierr;
525   const char     *prefix;
526   Mat            Anatural;
527   AO             ao;
528   PetscInt       rstart,rend,*petsc,i;
529   IS             is;
530   MPI_Comm       comm;
531 
532   PetscFunctionBegin;
533   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
534   ierr = PetscObjectQuery((PetscObject)A,"DMDA",(PetscObject*)&da);CHKERRQ(ierr);
535   if (!da) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
536 
537   ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);
538   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
539   ierr = PetscMalloc((rend-rstart)*sizeof(PetscInt),&petsc);CHKERRQ(ierr);
540   for (i=rstart; i<rend; i++) petsc[i-rstart] = i;
541   ierr = AOApplicationToPetsc(ao,rend-rstart,petsc);CHKERRQ(ierr);
542   ierr = ISCreateGeneral(comm,rend-rstart,petsc,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
543 
544   /* call viewer on natural ordering */
545   ierr = MatGetSubMatrix(A,is,is,MAT_INITIAL_MATRIX,&Anatural);CHKERRQ(ierr);
546   ierr = ISDestroy(is);CHKERRQ(ierr);
547   ierr = PetscObjectGetOptionsPrefix((PetscObject)A,&prefix);CHKERRQ(ierr);
548   ierr = PetscObjectSetOptionsPrefix((PetscObject)Anatural,prefix);CHKERRQ(ierr);
549   ierr = PetscObjectSetName((PetscObject)Anatural,((PetscObject)A)->name);CHKERRQ(ierr);
550   ierr = MatView(Anatural,viewer);CHKERRQ(ierr);
551   ierr = MatDestroy(Anatural);CHKERRQ(ierr);
552   PetscFunctionReturn(0);
553 }
554 EXTERN_C_END
555 
556 EXTERN_C_BEGIN
557 #undef __FUNCT__
558 #define __FUNCT__ "MatLoad_MPI_DA"
559 PetscErrorCode  MatLoad_MPI_DA(Mat A,PetscViewer viewer)
560 {
561   DM             da;
562   PetscErrorCode ierr;
563   Mat            Anatural,Aapp;
564   AO             ao;
565   PetscInt       rstart,rend,*app,i;
566   IS             is;
567   MPI_Comm       comm;
568 
569   PetscFunctionBegin;
570   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
571   ierr = PetscObjectQuery((PetscObject)A,"DMDA",(PetscObject*)&da);CHKERRQ(ierr);
572   if (!da) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
573 
574   /* Load the matrix in natural ordering */
575   ierr = MatCreate(((PetscObject)A)->comm,&Anatural);CHKERRQ(ierr);
576   ierr = MatSetType(Anatural,((PetscObject)A)->type_name);CHKERRQ(ierr);
577   ierr = MatSetSizes(Anatural,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
578   ierr = MatLoad(Anatural,viewer);CHKERRQ(ierr);
579 
580   /* Map natural ordering to application ordering and create IS */
581   ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);
582   ierr = MatGetOwnershipRange(Anatural,&rstart,&rend);CHKERRQ(ierr);
583   ierr = PetscMalloc((rend-rstart)*sizeof(PetscInt),&app);CHKERRQ(ierr);
584   for (i=rstart; i<rend; i++) app[i-rstart] = i;
585   ierr = AOPetscToApplication(ao,rend-rstart,app);CHKERRQ(ierr);
586   ierr = ISCreateGeneral(comm,rend-rstart,app,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
587 
588   /* Do permutation and replace header */
589   ierr = MatGetSubMatrix(Anatural,is,is,MAT_INITIAL_MATRIX,&Aapp);CHKERRQ(ierr);
590   ierr = MatHeaderReplace(A,Aapp);CHKERRQ(ierr);
591   ierr = ISDestroy(is);CHKERRQ(ierr);
592   ierr = MatDestroy(Anatural);CHKERRQ(ierr);
593   PetscFunctionReturn(0);
594 }
595 EXTERN_C_END
596 
597 #undef __FUNCT__
598 #define __FUNCT__ "DMGetMatrix_DA"
599 PetscErrorCode  DMGetMatrix_DA(DM da, const MatType mtype,Mat *J)
600 {
601   PetscErrorCode ierr;
602   PetscInt       dim,dof,nx,ny,nz,dims[3],starts[3],M,N,P;
603   Mat            A;
604   MPI_Comm       comm;
605   const MatType  Atype;
606   void           (*aij)(void)=PETSC_NULL,(*baij)(void)=PETSC_NULL,(*sbaij)(void)=PETSC_NULL;
607   MatType        ttype[256];
608   PetscBool      flg;
609   PetscMPIInt    size;
610   DM_DA          *dd = (DM_DA*)da->data;
611 
612   PetscFunctionBegin;
613 #ifndef PETSC_USE_DYNAMIC_LIBRARIES
614   ierr = MatInitializePackage(PETSC_NULL);CHKERRQ(ierr);
615 #endif
616   if (!mtype) mtype = MATAIJ;
617   ierr = PetscStrcpy((char*)ttype,mtype);CHKERRQ(ierr);
618   ierr = PetscOptionsBegin(((PetscObject)da)->comm,((PetscObject)da)->prefix,"DMDA options","Mat");CHKERRQ(ierr);
619   ierr = PetscOptionsList("-da_mat_type","Matrix type","MatSetType",MatList,mtype,(char*)ttype,256,&flg);CHKERRQ(ierr);
620   ierr = PetscOptionsEnd();
621 
622   /*
623                                   m
624           ------------------------------------------------------
625          |                                                     |
626          |                                                     |
627          |               ----------------------                |
628          |               |                    |                |
629       n  |           ny  |                    |                |
630          |               |                    |                |
631          |               .---------------------                |
632          |             (xs,ys)     nx                          |
633          |            .                                        |
634          |         (gxs,gys)                                   |
635          |                                                     |
636           -----------------------------------------------------
637   */
638 
639   /*
640          nc - number of components per grid point
641          col - number of colors needed in one direction for single component problem
642 
643   */
644   ierr = DMDAGetInfo(da,&dim,&M,&N,&P,0,0,0,&dof,0,0,0,0,0);CHKERRQ(ierr);
645   ierr = DMDAGetCorners(da,0,0,0,&nx,&ny,&nz);CHKERRQ(ierr);
646   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
647   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
648   ierr = MatSetSizes(A,dof*nx*ny*nz,dof*nx*ny*nz,dof*M*N*P,dof*M*N*P);CHKERRQ(ierr);
649   ierr = MatSetType(A,(const MatType)ttype);CHKERRQ(ierr);
650   ierr = MatSetDM(A,da);CHKERRQ(ierr);
651   ierr = MatSetFromOptions(A);CHKERRQ(ierr);
652   ierr = MatGetType(A,&Atype);CHKERRQ(ierr);
653   /*
654      We do not provide a getmatrix function in the DMDA operations because
655    the basic DMDA does not know about matrices. We think of DMDA as being more
656    more low-level than matrices. This is kind of cheating but, cause sometimes
657    we think of DMDA has higher level than matrices.
658 
659      We could switch based on Atype (or mtype), but we do not since the
660    specialized setting routines depend only the particular preallocation
661    details of the matrix, not the type itself.
662   */
663   ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJSetPreallocation_C",&aij);CHKERRQ(ierr);
664   if (!aij) {
665     ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJSetPreallocation_C",&aij);CHKERRQ(ierr);
666   }
667   if (!aij) {
668     ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIBAIJSetPreallocation_C",&baij);CHKERRQ(ierr);
669     if (!baij) {
670       ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",&baij);CHKERRQ(ierr);
671     }
672     if (!baij){
673       ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPISBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr);
674       if (!sbaij) {
675         ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr);
676       }
677     }
678   }
679   if (aij) {
680     if (dim == 1) {
681       ierr = DMGetMatrix_DA_1d_MPIAIJ(da,A);CHKERRQ(ierr);
682     } else if (dim == 2) {
683       if (dd->ofill) {
684         ierr = DMGetMatrix_DA_2d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
685       } else {
686         ierr = DMGetMatrix_DA_2d_MPIAIJ(da,A);CHKERRQ(ierr);
687       }
688     } else if (dim == 3) {
689       if (dd->ofill) {
690         ierr = DMGetMatrix_DA_3d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
691       } else {
692         ierr = DMGetMatrix_DA_3d_MPIAIJ(da,A);CHKERRQ(ierr);
693       }
694     }
695   } else if (baij) {
696     if (dim == 2) {
697       ierr = DMGetMatrix_DA_2d_MPIBAIJ(da,A);CHKERRQ(ierr);
698     } else if (dim == 3) {
699       ierr = DMGetMatrix_DA_3d_MPIBAIJ(da,A);CHKERRQ(ierr);
700     } else {
701       SETERRQ3(((PetscObject)da)->comm,PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension!\n" \
702 	       "Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
703     }
704   } else if (sbaij) {
705     if (dim == 2) {
706       ierr = DMGetMatrix_DA_2d_MPISBAIJ(da,A);CHKERRQ(ierr);
707     } else if (dim == 3) {
708       ierr = DMGetMatrix_DA_3d_MPISBAIJ(da,A);CHKERRQ(ierr);
709     } else {
710       SETERRQ3(((PetscObject)da)->comm,PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension!\n" \
711 	       "Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
712     }
713   }
714   ierr = DMDAGetGhostCorners(da,&starts[0],&starts[1],&starts[2],&dims[0],&dims[1],&dims[2]);CHKERRQ(ierr);
715   ierr = MatSetStencil(A,dim,dims,starts,dof);CHKERRQ(ierr);
716   ierr = PetscObjectCompose((PetscObject)A,"DMDA",(PetscObject)da);CHKERRQ(ierr);
717   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
718   if (size > 1) {
719     /* change viewer to display matrix in natural ordering */
720     ierr = MatShellSetOperation(A, MATOP_VIEW, (void (*)(void)) MatView_MPI_DA);CHKERRQ(ierr);
721     ierr = MatShellSetOperation(A, MATOP_LOAD, (void (*)(void)) MatLoad_MPI_DA);CHKERRQ(ierr);
722   }
723   *J = A;
724   PetscFunctionReturn(0);
725 }
726 
727 /* ---------------------------------------------------------------------------------*/
728 #undef __FUNCT__
729 #define __FUNCT__ "DMGetMatrix_DA_2d_MPIAIJ"
730 PetscErrorCode DMGetMatrix_DA_2d_MPIAIJ(DM da,Mat J)
731 {
732   PetscErrorCode         ierr;
733   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = PETSC_NULL,k,nc,*rows = PETSC_NULL,col,cnt,l,p;
734   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
735   MPI_Comm               comm;
736   PetscScalar            *values;
737   DMDABoundaryType       bx,by;
738   ISLocalToGlobalMapping ltog,ltogb;
739   DMDAStencilType        st;
740   DM_DA                  *dd = (DM_DA*)da->data;
741 
742   PetscFunctionBegin;
743   /*
744          nc - number of components per grid point
745          col - number of colors needed in one direction for single component problem
746 
747   */
748   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
749   col = 2*s + 1;
750   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
751   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
752   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
753 
754   ierr = PetscMalloc2(nc,PetscInt,&rows,col*col*nc*nc,PetscInt,&cols);CHKERRQ(ierr);
755   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
756   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
757 
758   /* determine the matrix preallocation information */
759   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
760   for (i=xs; i<xs+nx; i++) {
761 
762     pstart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
763     pend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
764 
765     for (j=ys; j<ys+ny; j++) {
766       slot = i - gxs + gnx*(j - gys);
767 
768       lstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
769       lend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
770 
771       cnt  = 0;
772       for (k=0; k<nc; k++) {
773 	for (l=lstart; l<lend+1; l++) {
774 	  for (p=pstart; p<pend+1; p++) {
775 	    if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
776 	      cols[cnt++]  = k + nc*(slot + gnx*l + p);
777 	    }
778 	  }
779 	}
780 	rows[k] = k + nc*(slot);
781       }
782       ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
783     }
784   }
785   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
786   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
787   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
788   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
789 
790   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
791   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
792 
793   /*
794     For each node in the grid: we get the neighbors in the local (on processor ordering
795     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
796     PETSc ordering.
797   */
798   if (!da->prealloc_only) {
799     ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
800     ierr = PetscMemzero(values,col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
801     for (i=xs; i<xs+nx; i++) {
802 
803       pstart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
804       pend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
805 
806       for (j=ys; j<ys+ny; j++) {
807 	slot = i - gxs + gnx*(j - gys);
808 
809 	lstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
810 	lend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
811 
812 	cnt  = 0;
813 	for (k=0; k<nc; k++) {
814 	  for (l=lstart; l<lend+1; l++) {
815 	    for (p=pstart; p<pend+1; p++) {
816 	      if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
817 		cols[cnt++]  = k + nc*(slot + gnx*l + p);
818 	      }
819 	    }
820 	  }
821 	  rows[k]      = k + nc*(slot);
822 	}
823 	ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
824       }
825     }
826     ierr = PetscFree(values);CHKERRQ(ierr);
827     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
828     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
829   }
830   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
831   PetscFunctionReturn(0);
832 }
833 
834 #undef __FUNCT__
835 #define __FUNCT__ "DMGetMatrix_DA_2d_MPIAIJ_Fill"
836 PetscErrorCode DMGetMatrix_DA_2d_MPIAIJ_Fill(DM da,Mat J)
837 {
838   PetscErrorCode         ierr;
839   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
840   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt,l,p;
841   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
842   DM_DA                  *dd = (DM_DA*)da->data;
843   PetscInt               ifill_col,*ofill = dd->ofill, *dfill = dd->dfill;
844   MPI_Comm               comm;
845   PetscScalar            *values;
846   DMDABoundaryType       bx,by;
847   ISLocalToGlobalMapping ltog,ltogb;
848   DMDAStencilType        st;
849 
850   PetscFunctionBegin;
851   /*
852          nc - number of components per grid point
853          col - number of colors needed in one direction for single component problem
854 
855   */
856   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
857   col = 2*s + 1;
858   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
859   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
860   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
861 
862   ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscInt),&cols);CHKERRQ(ierr);
863   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
864   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
865 
866   /* determine the matrix preallocation information */
867   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
868   for (i=xs; i<xs+nx; i++) {
869 
870     pstart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
871     pend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
872 
873     for (j=ys; j<ys+ny; j++) {
874       slot = i - gxs + gnx*(j - gys);
875 
876       lstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
877       lend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
878 
879       for (k=0; k<nc; k++) {
880         cnt  = 0;
881 	for (l=lstart; l<lend+1; l++) {
882 	  for (p=pstart; p<pend+1; p++) {
883             if (l || p) {
884 	      if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
885                 for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++)
886 		  cols[cnt++]  = ofill[ifill_col] + nc*(slot + gnx*l + p);
887 	      }
888             } else {
889 	      if (dfill) {
890 		for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++)
891 		  cols[cnt++]  = dfill[ifill_col] + nc*(slot + gnx*l + p);
892 	      } else {
893 		for (ifill_col=0; ifill_col<nc; ifill_col++)
894 		  cols[cnt++]  = ifill_col + nc*(slot + gnx*l + p);
895 	      }
896             }
897 	  }
898 	}
899 	row = k + nc*(slot);
900         ierr = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
901       }
902     }
903   }
904   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
905   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
906   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
907   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
908   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
909 
910   /*
911     For each node in the grid: we get the neighbors in the local (on processor ordering
912     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
913     PETSc ordering.
914   */
915   if (!da->prealloc_only) {
916     ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
917     ierr = PetscMemzero(values,col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
918     for (i=xs; i<xs+nx; i++) {
919 
920       pstart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
921       pend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
922 
923       for (j=ys; j<ys+ny; j++) {
924 	slot = i - gxs + gnx*(j - gys);
925 
926 	lstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
927 	lend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
928 
929 	for (k=0; k<nc; k++) {
930 	  cnt  = 0;
931 	  for (l=lstart; l<lend+1; l++) {
932 	    for (p=pstart; p<pend+1; p++) {
933 	      if (l || p) {
934 		if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
935 		  for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++)
936 		    cols[cnt++]  = ofill[ifill_col] + nc*(slot + gnx*l + p);
937 		}
938 	      } else {
939 		if (dfill) {
940 		  for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++)
941 		    cols[cnt++]  = dfill[ifill_col] + nc*(slot + gnx*l + p);
942 		} else {
943 		  for (ifill_col=0; ifill_col<nc; ifill_col++)
944 		    cols[cnt++]  = ifill_col + nc*(slot + gnx*l + p);
945 		}
946 	      }
947 	    }
948 	  }
949 	  row  = k + nc*(slot);
950 	  ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
951 	}
952       }
953     }
954     ierr = PetscFree(values);CHKERRQ(ierr);
955     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
956     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
957   }
958   ierr = PetscFree(cols);CHKERRQ(ierr);
959   PetscFunctionReturn(0);
960 }
961 
962 /* ---------------------------------------------------------------------------------*/
963 
964 #undef __FUNCT__
965 #define __FUNCT__ "DMGetMatrix_DA_3d_MPIAIJ"
966 PetscErrorCode DMGetMatrix_DA_3d_MPIAIJ(DM da,Mat J)
967 {
968   PetscErrorCode         ierr;
969   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
970   PetscInt               m,n,dim,s,*cols = PETSC_NULL,k,nc,*rows = PETSC_NULL,col,cnt,l,p,*dnz = PETSC_NULL,*onz = PETSC_NULL;
971   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
972   MPI_Comm               comm;
973   PetscScalar            *values;
974   DMDABoundaryType       bx,by,bz;
975   ISLocalToGlobalMapping ltog,ltogb;
976   DMDAStencilType        st;
977   DM_DA                  *dd = (DM_DA*)da->data;
978 
979   PetscFunctionBegin;
980   /*
981          nc - number of components per grid point
982          col - number of colors needed in one direction for single component problem
983 
984   */
985   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
986   col    = 2*s + 1;
987 
988   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
989   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
990   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
991 
992   ierr = PetscMalloc2(nc,PetscInt,&rows,col*col*col*nc*nc,PetscInt,&cols);CHKERRQ(ierr);
993   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
994   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
995 
996   /* determine the matrix preallocation information */
997   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
998   for (i=xs; i<xs+nx; i++) {
999     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1000     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1001     for (j=ys; j<ys+ny; j++) {
1002       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1003       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1004       for (k=zs; k<zs+nz; k++) {
1005 	kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1006 	kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1007 
1008 	slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1009 
1010 	cnt  = 0;
1011 	for (l=0; l<nc; l++) {
1012 	  for (ii=istart; ii<iend+1; ii++) {
1013 	    for (jj=jstart; jj<jend+1; jj++) {
1014 	      for (kk=kstart; kk<kend+1; kk++) {
1015 		if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1016 		  cols[cnt++]  = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1017 		}
1018 	      }
1019 	    }
1020 	  }
1021 	  rows[l] = l + nc*(slot);
1022 	}
1023 	ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1024       }
1025     }
1026   }
1027   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1028   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1029   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1030   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1031   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1032   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1033 
1034   /*
1035     For each node in the grid: we get the neighbors in the local (on processor ordering
1036     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1037     PETSc ordering.
1038   */
1039   if (!da->prealloc_only) {
1040     ierr = PetscMalloc(col*col*col*nc*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1041     ierr = PetscMemzero(values,col*col*col*nc*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1042     for (i=xs; i<xs+nx; i++) {
1043       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1044       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1045       for (j=ys; j<ys+ny; j++) {
1046 	jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1047 	jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1048 	for (k=zs; k<zs+nz; k++) {
1049 	  kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1050 	  kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1051 
1052 	  slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1053 
1054 	  cnt  = 0;
1055 	  for (l=0; l<nc; l++) {
1056 	    for (ii=istart; ii<iend+1; ii++) {
1057 	      for (jj=jstart; jj<jend+1; jj++) {
1058 		for (kk=kstart; kk<kend+1; kk++) {
1059 		  if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1060 		    cols[cnt++]  = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1061 		  }
1062 		}
1063 	      }
1064 	    }
1065 	    rows[l]      = l + nc*(slot);
1066 	  }
1067 	  ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1068 	}
1069       }
1070     }
1071     ierr = PetscFree(values);CHKERRQ(ierr);
1072     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1073     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1074   }
1075   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1076   PetscFunctionReturn(0);
1077 }
1078 
1079 /* ---------------------------------------------------------------------------------*/
1080 
1081 #undef __FUNCT__
1082 #define __FUNCT__ "DMGetMatrix_DA_1d_MPIAIJ"
1083 PetscErrorCode DMGetMatrix_DA_1d_MPIAIJ(DM da,Mat J)
1084 {
1085   PetscErrorCode         ierr;
1086   PetscInt               xs,nx,i,i1,slot,gxs,gnx;
1087   PetscInt               m,dim,s,*cols = PETSC_NULL,nc,*rows = PETSC_NULL,col,cnt,l;
1088   PetscInt               istart,iend;
1089   PetscScalar            *values;
1090   DMDABoundaryType       bx;
1091   ISLocalToGlobalMapping ltog,ltogb;
1092   DM_DA                  *dd = (DM_DA*)da->data;
1093 
1094   PetscFunctionBegin;
1095   /*
1096          nc - number of components per grid point
1097          col - number of colors needed in one direction for single component problem
1098 
1099   */
1100   ierr = DMDAGetInfo(da,&dim,&m,0,0,0,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr);
1101   col    = 2*s + 1;
1102 
1103   ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
1104   ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
1105 
1106   ierr = MatSeqAIJSetPreallocation(J,col*nc,0);CHKERRQ(ierr);
1107   ierr = MatMPIAIJSetPreallocation(J,col*nc,0,col*nc,0);CHKERRQ(ierr);
1108   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1109   ierr = PetscMalloc2(nc,PetscInt,&rows,col*nc*nc,PetscInt,&cols);CHKERRQ(ierr);
1110 
1111   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1112   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1113   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1114   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1115 
1116   /*
1117     For each node in the grid: we get the neighbors in the local (on processor ordering
1118     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1119     PETSc ordering.
1120   */
1121   if (!da->prealloc_only) {
1122     ierr = PetscMalloc(col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1123     ierr = PetscMemzero(values,col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1124     for (i=xs; i<xs+nx; i++) {
1125       istart = PetscMax(-s,gxs - i);
1126       iend   = PetscMin(s,gxs + gnx - i - 1);
1127       slot   = i - gxs;
1128 
1129       cnt  = 0;
1130       for (l=0; l<nc; l++) {
1131 	for (i1=istart; i1<iend+1; i1++) {
1132 	  cols[cnt++] = l + nc*(slot + i1);
1133 	}
1134 	rows[l]      = l + nc*(slot);
1135       }
1136       ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1137     }
1138     ierr = PetscFree(values);CHKERRQ(ierr);
1139     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1140     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1141   }
1142   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1143   PetscFunctionReturn(0);
1144 }
1145 
1146 #undef __FUNCT__
1147 #define __FUNCT__ "DMGetMatrix_DA_2d_MPIBAIJ"
1148 PetscErrorCode DMGetMatrix_DA_2d_MPIBAIJ(DM da,Mat J)
1149 {
1150   PetscErrorCode         ierr;
1151   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1152   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
1153   PetscInt               istart,iend,jstart,jend,ii,jj;
1154   MPI_Comm               comm;
1155   PetscScalar            *values;
1156   DMDABoundaryType       bx,by;
1157   DMDAStencilType        st;
1158   ISLocalToGlobalMapping ltog,ltogb;
1159   DM_DA                  *dd = (DM_DA*)da->data;
1160 
1161   PetscFunctionBegin;
1162   /*
1163      nc - number of components per grid point
1164      col - number of colors needed in one direction for single component problem
1165   */
1166   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
1167   col = 2*s + 1;
1168 
1169   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
1170   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
1171   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1172 
1173   ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1174 
1175   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1176   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1177 
1178   /* determine the matrix preallocation information */
1179   ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
1180   for (i=xs; i<xs+nx; i++) {
1181     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1182     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1183     for (j=ys; j<ys+ny; j++) {
1184       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1185       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1186       slot = i - gxs + gnx*(j - gys);
1187 
1188       /* Find block columns in block row */
1189       cnt  = 0;
1190       for (ii=istart; ii<iend+1; ii++) {
1191         for (jj=jstart; jj<jend+1; jj++) {
1192           if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1193             cols[cnt++]  = slot + ii + gnx*jj;
1194           }
1195         }
1196       }
1197       ierr = MatPreallocateSetLocal(ltogb,1,&slot,ltogb,cnt,cols,dnz,onz);CHKERRQ(ierr);
1198     }
1199   }
1200   ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1201   ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1202   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1203 
1204   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1205   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1206 
1207   /*
1208     For each node in the grid: we get the neighbors in the local (on processor ordering
1209     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1210     PETSc ordering.
1211   */
1212   if (!da->prealloc_only) {
1213     ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1214     ierr = PetscMemzero(values,col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1215     for (i=xs; i<xs+nx; i++) {
1216       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1217       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1218       for (j=ys; j<ys+ny; j++) {
1219 	jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1220 	jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1221 	slot = i - gxs + gnx*(j - gys);
1222 	cnt  = 0;
1223         for (ii=istart; ii<iend+1; ii++) {
1224           for (jj=jstart; jj<jend+1; jj++) {
1225             if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1226               cols[cnt++]  = slot + ii + gnx*jj;
1227             }
1228           }
1229         }
1230 	ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1231       }
1232     }
1233     ierr = PetscFree(values);CHKERRQ(ierr);
1234     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1235     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1236   }
1237   ierr = PetscFree(cols);CHKERRQ(ierr);
1238   PetscFunctionReturn(0);
1239 }
1240 
1241 #undef __FUNCT__
1242 #define __FUNCT__ "DMGetMatrix_DA_3d_MPIBAIJ"
1243 PetscErrorCode DMGetMatrix_DA_3d_MPIBAIJ(DM da,Mat J)
1244 {
1245   PetscErrorCode         ierr;
1246   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1247   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
1248   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1249   MPI_Comm               comm;
1250   PetscScalar            *values;
1251   DMDABoundaryType       bx,by,bz;
1252   DMDAStencilType        st;
1253   ISLocalToGlobalMapping ltog,ltogb;
1254   DM_DA                  *dd = (DM_DA*)da->data;
1255 
1256   PetscFunctionBegin;
1257   /*
1258          nc - number of components per grid point
1259          col - number of colors needed in one direction for single component problem
1260 
1261   */
1262   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1263   col    = 2*s + 1;
1264 
1265   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1266   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1267   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1268 
1269   ierr  = PetscMalloc(col*col*col*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1270 
1271   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1272   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1273 
1274   /* determine the matrix preallocation information */
1275   ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1276   for (i=xs; i<xs+nx; i++) {
1277     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1278     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1279     for (j=ys; j<ys+ny; j++) {
1280       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1281       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1282       for (k=zs; k<zs+nz; k++) {
1283 	kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1284 	kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1285 
1286 	slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1287 
1288 	/* Find block columns in block row */
1289 	cnt  = 0;
1290         for (ii=istart; ii<iend+1; ii++) {
1291           for (jj=jstart; jj<jend+1; jj++) {
1292             for (kk=kstart; kk<kend+1; kk++) {
1293               if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1294 		cols[cnt++]  = slot + ii + gnx*jj + gnx*gny*kk;
1295 	      }
1296 	    }
1297 	  }
1298 	}
1299 	ierr = MatPreallocateSetLocal(ltogb,1,&slot,ltogb,cnt,cols,dnz,onz);CHKERRQ(ierr);
1300       }
1301     }
1302   }
1303   ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1304   ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1305   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1306 
1307   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1308   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1309 
1310   /*
1311     For each node in the grid: we get the neighbors in the local (on processor ordering
1312     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1313     PETSc ordering.
1314   */
1315   if (!da->prealloc_only) {
1316     ierr  = PetscMalloc(col*col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1317     ierr  = PetscMemzero(values,col*col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1318     for (i=xs; i<xs+nx; i++) {
1319       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1320       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1321       for (j=ys; j<ys+ny; j++) {
1322 	jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1323 	jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1324 	for (k=zs; k<zs+nz; k++) {
1325 	  kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1326 	  kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1327 
1328 	  slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1329 
1330 	  cnt  = 0;
1331           for (ii=istart; ii<iend+1; ii++) {
1332             for (jj=jstart; jj<jend+1; jj++) {
1333               for (kk=kstart; kk<kend+1; kk++) {
1334                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1335                   cols[cnt++]  = slot + ii + gnx*jj + gnx*gny*kk;
1336                 }
1337               }
1338             }
1339           }
1340 	  ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1341 	}
1342       }
1343     }
1344     ierr = PetscFree(values);CHKERRQ(ierr);
1345     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1346     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1347   }
1348   ierr = PetscFree(cols);CHKERRQ(ierr);
1349   PetscFunctionReturn(0);
1350 }
1351 
1352 #undef __FUNCT__
1353 #define __FUNCT__ "L2GFilterUpperTriangular"
1354 /*
1355   This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to
1356   identify in the local ordering with periodic domain.
1357 */
1358 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog,PetscInt *row,PetscInt *cnt,PetscInt col[])
1359 {
1360   PetscErrorCode ierr;
1361   PetscInt       i,n;
1362 
1363   PetscFunctionBegin;
1364   ierr = ISLocalToGlobalMappingApply(ltog,1,row,row);CHKERRQ(ierr);
1365   ierr = ISLocalToGlobalMappingApply(ltog,*cnt,col,col);CHKERRQ(ierr);
1366   for (i=0,n=0; i<*cnt; i++) {
1367     if (col[i] >= *row) col[n++] = col[i];
1368   }
1369   *cnt = n;
1370   PetscFunctionReturn(0);
1371 }
1372 
1373 #undef __FUNCT__
1374 #define __FUNCT__ "DMGetMatrix_DA_2d_MPISBAIJ"
1375 PetscErrorCode DMGetMatrix_DA_2d_MPISBAIJ(DM da,Mat J)
1376 {
1377   PetscErrorCode         ierr;
1378   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1379   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
1380   PetscInt               istart,iend,jstart,jend,ii,jj;
1381   MPI_Comm               comm;
1382   PetscScalar            *values;
1383   DMDABoundaryType       bx,by;
1384   DMDAStencilType        st;
1385   ISLocalToGlobalMapping ltog,ltogb;
1386   DM_DA                  *dd = (DM_DA*)da->data;
1387 
1388   PetscFunctionBegin;
1389   /*
1390      nc - number of components per grid point
1391      col - number of colors needed in one direction for single component problem
1392   */
1393   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
1394   col = 2*s + 1;
1395 
1396   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
1397   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
1398   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1399 
1400   ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1401 
1402   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1403   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1404 
1405   /* determine the matrix preallocation information */
1406   ierr = MatPreallocateSymmetricInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
1407   for (i=xs; i<xs+nx; i++) {
1408     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1409     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1410     for (j=ys; j<ys+ny; j++) {
1411       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1412       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1413       slot = i - gxs + gnx*(j - gys);
1414 
1415       /* Find block columns in block row */
1416       cnt  = 0;
1417       for (ii=istart; ii<iend+1; ii++) {
1418         for (jj=jstart; jj<jend+1; jj++) {
1419           if (st == DMDA_STENCIL_BOX || !ii || !jj) {
1420             cols[cnt++]  = slot + ii + gnx*jj;
1421           }
1422         }
1423       }
1424       ierr = L2GFilterUpperTriangular(ltogb,&slot,&cnt,cols);CHKERRQ(ierr);
1425       ierr = MatPreallocateSymmetricSet(slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
1426     }
1427   }
1428   ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1429   ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1430   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1431 
1432   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1433   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1434 
1435   /*
1436     For each node in the grid: we get the neighbors in the local (on processor ordering
1437     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1438     PETSc ordering.
1439   */
1440   if (!da->prealloc_only) {
1441     ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1442     ierr = PetscMemzero(values,col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1443     for (i=xs; i<xs+nx; i++) {
1444       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1445       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1446       for (j=ys; j<ys+ny; j++) {
1447         jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1448         jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1449         slot = i - gxs + gnx*(j - gys);
1450 
1451         /* Find block columns in block row */
1452         cnt  = 0;
1453         for (ii=istart; ii<iend+1; ii++) {
1454           for (jj=jstart; jj<jend+1; jj++) {
1455             if (st == DMDA_STENCIL_BOX || !ii || !jj) {
1456               cols[cnt++]  = slot + ii + gnx*jj;
1457             }
1458           }
1459         }
1460         ierr = L2GFilterUpperTriangular(ltogb,&slot,&cnt,cols);CHKERRQ(ierr);
1461 	ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1462       }
1463     }
1464     ierr = PetscFree(values);CHKERRQ(ierr);
1465     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1466     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1467   }
1468   ierr = PetscFree(cols);CHKERRQ(ierr);
1469   PetscFunctionReturn(0);
1470 }
1471 
1472 #undef __FUNCT__
1473 #define __FUNCT__ "DMGetMatrix_DA_3d_MPISBAIJ"
1474 PetscErrorCode DMGetMatrix_DA_3d_MPISBAIJ(DM da,Mat J)
1475 {
1476   PetscErrorCode         ierr;
1477   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1478   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
1479   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1480   MPI_Comm               comm;
1481   PetscScalar            *values;
1482   DMDABoundaryType       bx,by,bz;
1483   DMDAStencilType        st;
1484   ISLocalToGlobalMapping ltog,ltogb;
1485   DM_DA                  *dd = (DM_DA*)da->data;
1486 
1487   PetscFunctionBegin;
1488   /*
1489      nc - number of components per grid point
1490      col - number of colors needed in one direction for single component problem
1491   */
1492   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1493   col = 2*s + 1;
1494 
1495   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1496   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1497   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1498 
1499   /* create the matrix */
1500   ierr = PetscMalloc(col*col*col*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1501 
1502   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1503   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1504 
1505   /* determine the matrix preallocation information */
1506   ierr = MatPreallocateSymmetricInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1507   for (i=xs; i<xs+nx; i++) {
1508     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1509     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1510     for (j=ys; j<ys+ny; j++) {
1511       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1512       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1513       for (k=zs; k<zs+nz; k++) {
1514         kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1515 	kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1516 
1517 	slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1518 
1519 	/* Find block columns in block row */
1520 	cnt  = 0;
1521         for (ii=istart; ii<iend+1; ii++) {
1522           for (jj=jstart; jj<jend+1; jj++) {
1523             for (kk=kstart; kk<kend+1; kk++) {
1524               if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
1525                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1526               }
1527             }
1528           }
1529         }
1530         ierr = L2GFilterUpperTriangular(ltogb,&slot,&cnt,cols);CHKERRQ(ierr);
1531         ierr = MatPreallocateSymmetricSet(slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
1532       }
1533     }
1534   }
1535   ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1536   ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1537   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1538 
1539   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1540   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1541 
1542   /*
1543     For each node in the grid: we get the neighbors in the local (on processor ordering
1544     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1545     PETSc ordering.
1546   */
1547   if (!da->prealloc_only) {
1548     ierr = PetscMalloc(col*col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1549     ierr = PetscMemzero(values,col*col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1550     for (i=xs; i<xs+nx; i++) {
1551       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1552       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1553       for (j=ys; j<ys+ny; j++) {
1554         jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1555 	jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1556 	for (k=zs; k<zs+nz; k++) {
1557           kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1558 	  kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1559 
1560 	  slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1561 
1562 	  cnt  = 0;
1563           for (ii=istart; ii<iend+1; ii++) {
1564             for (jj=jstart; jj<jend+1; jj++) {
1565               for (kk=kstart; kk<kend+1; kk++) {
1566                 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
1567 		  cols[cnt++]  = slot + ii + gnx*jj + gnx*gny*kk;
1568 		}
1569 	      }
1570 	    }
1571 	  }
1572           ierr = L2GFilterUpperTriangular(ltogb,&slot,&cnt,cols);CHKERRQ(ierr);
1573           ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1574 	}
1575       }
1576     }
1577     ierr = PetscFree(values);CHKERRQ(ierr);
1578     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1579     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1580   }
1581   ierr = PetscFree(cols);CHKERRQ(ierr);
1582   PetscFunctionReturn(0);
1583 }
1584 
1585 /* ---------------------------------------------------------------------------------*/
1586 
1587 #undef __FUNCT__
1588 #define __FUNCT__ "DMGetMatrix_DA_3d_MPIAIJ_Fill"
1589 PetscErrorCode DMGetMatrix_DA_3d_MPIAIJ_Fill(DM da,Mat J)
1590 {
1591   PetscErrorCode         ierr;
1592   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1593   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt,l,p,*dnz,*onz;
1594   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1595   DM_DA                  *dd = (DM_DA*)da->data;
1596   PetscInt               ifill_col,*dfill = dd->dfill,*ofill = dd->ofill;
1597   MPI_Comm               comm;
1598   PetscScalar            *values;
1599   DMDABoundaryType       bx,by,bz;
1600   ISLocalToGlobalMapping ltog,ltogb;
1601   DMDAStencilType        st;
1602 
1603   PetscFunctionBegin;
1604   /*
1605          nc - number of components per grid point
1606          col - number of colors needed in one direction for single component problem
1607 
1608   */
1609   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1610   col    = 2*s + 1;
1611   if (bx == DMDA_BOUNDARY_PERIODIC && (m % col)){
1612     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
1613                  by 2*stencil_width + 1\n");
1614   }
1615   if (by == DMDA_BOUNDARY_PERIODIC && (n % col)){
1616     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
1617                  by 2*stencil_width + 1\n");
1618   }
1619   if (bz == DMDA_BOUNDARY_PERIODIC && (p % col)){
1620     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
1621                  by 2*stencil_width + 1\n");
1622   }
1623 
1624   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1625   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1626   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1627 
1628   ierr = PetscMalloc(col*col*col*nc*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1629   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1630   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1631 
1632   /* determine the matrix preallocation information */
1633   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1634 
1635 
1636   for (i=xs; i<xs+nx; i++) {
1637     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1638     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1639     for (j=ys; j<ys+ny; j++) {
1640       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1641       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1642       for (k=zs; k<zs+nz; k++) {
1643         kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1644         kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1645 
1646         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1647 
1648 	for (l=0; l<nc; l++) {
1649 	  cnt  = 0;
1650 	  for (ii=istart; ii<iend+1; ii++) {
1651 	    for (jj=jstart; jj<jend+1; jj++) {
1652 	      for (kk=kstart; kk<kend+1; kk++) {
1653 		if (ii || jj || kk) {
1654 		  if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1655 		    for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++)
1656 		      cols[cnt++]  = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1657 		  }
1658 		} else {
1659 		  if (dfill) {
1660 		    for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++)
1661 		      cols[cnt++]  = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1662 		  } else {
1663 		    for (ifill_col=0; ifill_col<nc; ifill_col++)
1664 		      cols[cnt++]  = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1665 		  }
1666 		}
1667 	      }
1668 	    }
1669 	  }
1670 	  row  = l + nc*(slot);
1671 	  ierr = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1672 	}
1673       }
1674     }
1675   }
1676   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1677   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1678   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1679   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1680   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1681 
1682   /*
1683     For each node in the grid: we get the neighbors in the local (on processor ordering
1684     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1685     PETSc ordering.
1686   */
1687   if (!da->prealloc_only) {
1688     ierr = PetscMalloc(col*col*col*nc*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1689     ierr = PetscMemzero(values,col*col*col*nc*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1690     for (i=xs; i<xs+nx; i++) {
1691       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1692       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1693       for (j=ys; j<ys+ny; j++) {
1694 	jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1695 	jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1696 	for (k=zs; k<zs+nz; k++) {
1697 	  kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1698 	  kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1699 
1700 	  slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1701 
1702 	  for (l=0; l<nc; l++) {
1703 	    cnt  = 0;
1704 	    for (ii=istart; ii<iend+1; ii++) {
1705 	      for (jj=jstart; jj<jend+1; jj++) {
1706 		for (kk=kstart; kk<kend+1; kk++) {
1707 		  if (ii || jj || kk) {
1708 		    if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1709 		      for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++)
1710 			cols[cnt++]  = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1711 		    }
1712 		  } else {
1713 		    if (dfill) {
1714 		      for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++)
1715 			cols[cnt++]  = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1716 		    } else {
1717 		      for (ifill_col=0; ifill_col<nc; ifill_col++)
1718 			cols[cnt++]  = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1719 		    }
1720 		  }
1721 		}
1722 	      }
1723 	    }
1724 	    row  = l + nc*(slot);
1725 	    ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1726 	  }
1727 	}
1728       }
1729     }
1730     ierr = PetscFree(values);CHKERRQ(ierr);
1731     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1732     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1733   }
1734   ierr = PetscFree(cols);CHKERRQ(ierr);
1735   PetscFunctionReturn(0);
1736 }
1737