xref: /petsc/src/dm/impls/da/fdda.c (revision 7b6bb2c608b6fc6714ef38fda02c2dbb91c82665)
1 
2 #include <private/daimpl.h> /*I      "petscdmda.h"     I*/
3 #include <petscmat.h>         /*I      "petscmat.h"    I*/
4 #include <private/matimpl.h>
5 
6 extern PetscErrorCode DMGetColoring_DA_1d_MPIAIJ(DM,ISColoringType,ISColoring *);
7 extern PetscErrorCode DMGetColoring_DA_2d_MPIAIJ(DM,ISColoringType,ISColoring *);
8 extern PetscErrorCode DMGetColoring_DA_2d_5pt_MPIAIJ(DM,ISColoringType,ISColoring *);
9 extern PetscErrorCode DMGetColoring_DA_3d_MPIAIJ(DM,ISColoringType,ISColoring *);
10 
11 /*
12    For ghost i that may be negative or greater than the upper bound this
13   maps it into the 0:m-1 range using periodicity
14 */
15 #define SetInRange(i,m) ((i < 0) ? m+i:((i >= m) ? i-m:i))
16 
17 #undef __FUNCT__
18 #define __FUNCT__ "DMDASetBlockFills_Private"
19 static PetscErrorCode DMDASetBlockFills_Private(PetscInt *dfill,PetscInt w,PetscInt **rfill)
20 {
21   PetscErrorCode ierr;
22   PetscInt       i,j,nz,*fill;
23 
24   PetscFunctionBegin;
25   if (!dfill) PetscFunctionReturn(0);
26 
27   /* count number nonzeros */
28   nz = 0;
29   for (i=0; i<w; i++) {
30     for (j=0; j<w; j++) {
31       if (dfill[w*i+j]) nz++;
32     }
33   }
34   ierr = PetscMalloc((nz + w + 1)*sizeof(PetscInt),&fill);CHKERRQ(ierr);
35   /* construct modified CSR storage of nonzero structure */
36   nz = w + 1;
37   for (i=0; i<w; i++) {
38     fill[i] = nz;
39     for (j=0; j<w; j++) {
40       if (dfill[w*i+j]) {
41 	fill[nz] = j;
42 	nz++;
43       }
44     }
45   }
46   fill[w] = nz;
47 
48   *rfill = fill;
49   PetscFunctionReturn(0);
50 }
51 
52 #undef __FUNCT__
53 #define __FUNCT__ "DMDASetBlockFills"
54 /*@
55     DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem
56     of the matrix returned by DMGetMatrix().
57 
58     Logically Collective on DMDA
59 
60     Input Parameter:
61 +   da - the distributed array
62 .   dfill - the fill pattern in the diagonal block (may be PETSC_NULL, means use dense block)
63 -   ofill - the fill pattern in the off-diagonal blocks
64 
65 
66     Level: developer
67 
68     Notes: This only makes sense when you are doing multicomponent problems but using the
69        MPIAIJ matrix format
70 
71            The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries
72        representing coupling and 0 entries for missing coupling. For example
73 $             dfill[9] = {1, 0, 0,
74 $                         1, 1, 0,
75 $                         0, 1, 1}
76        means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with
77        itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the
78        diagonal block).
79 
80      DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
81      can be represented in the dfill, ofill format
82 
83    Contributed by Glenn Hammond
84 
85 .seealso DMGetMatrix(), DMDASetGetMatrix(), DMDASetMatPreallocateOnly()
86 
87 @*/
88 PetscErrorCode  DMDASetBlockFills(DM da,PetscInt *dfill,PetscInt *ofill)
89 {
90   DM_DA          *dd = (DM_DA*)da->data;
91   PetscErrorCode ierr;
92 
93   PetscFunctionBegin;
94   ierr = DMDASetBlockFills_Private(dfill,dd->w,&dd->dfill);CHKERRQ(ierr);
95   ierr = DMDASetBlockFills_Private(ofill,dd->w,&dd->ofill);CHKERRQ(ierr);
96   PetscFunctionReturn(0);
97 }
98 
99 
100 #undef __FUNCT__
101 #define __FUNCT__ "DMGetColoring_DA"
102 PetscErrorCode  DMGetColoring_DA(DM da,ISColoringType ctype,const MatType mtype,ISColoring *coloring)
103 {
104   PetscErrorCode   ierr;
105   PetscInt         dim,m,n,p,nc;
106   DMDABoundaryType bx,by,bz;
107   MPI_Comm         comm;
108   PetscMPIInt      size;
109   PetscBool        isBAIJ;
110   DM_DA            *dd = (DM_DA*)da->data;
111 
112   PetscFunctionBegin;
113   /*
114                                   m
115           ------------------------------------------------------
116          |                                                     |
117          |                                                     |
118          |               ----------------------                |
119          |               |                    |                |
120       n  |           yn  |                    |                |
121          |               |                    |                |
122          |               .---------------------                |
123          |             (xs,ys)     xn                          |
124          |            .                                        |
125          |         (gxs,gys)                                   |
126          |                                                     |
127           -----------------------------------------------------
128   */
129 
130   /*
131          nc - number of components per grid point
132          col - number of colors needed in one direction for single component problem
133 
134   */
135   ierr = DMDAGetInfo(da,&dim,0,0,0,&m,&n,&p,&nc,0,&bx,&by,&bz,0);CHKERRQ(ierr);
136 
137   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
138   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
139   if (ctype == IS_COLORING_GHOSTED){
140     if (size == 1) {
141       ctype = IS_COLORING_GLOBAL;
142     } else if (dim > 1){
143       if ((m==1 && bx == DMDA_BOUNDARY_PERIODIC) || (n==1 && by == DMDA_BOUNDARY_PERIODIC) || (p==1 && bz == DMDA_BOUNDARY_PERIODIC)){
144         SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"IS_COLORING_GHOSTED cannot be used for periodic boundary condition having both ends of the domain  on the same process");
145       }
146     }
147   }
148 
149   /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ
150      matrices is for the blocks, not the individual matrix elements  */
151   ierr = PetscStrcmp(mtype,MATBAIJ,&isBAIJ);CHKERRQ(ierr);
152   if (!isBAIJ) {ierr = PetscStrcmp(mtype,MATMPIBAIJ,&isBAIJ);CHKERRQ(ierr);}
153   if (!isBAIJ) {ierr = PetscStrcmp(mtype,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);}
154   if (isBAIJ) {
155     dd->w = 1;
156     dd->xs = dd->xs/nc;
157     dd->xe = dd->xe/nc;
158     dd->Xs = dd->Xs/nc;
159     dd->Xe = dd->Xe/nc;
160   }
161 
162   /*
163      We do not provide a getcoloring function in the DMDA operations because
164    the basic DMDA does not know about matrices. We think of DMDA as being more
165    more low-level then matrices.
166   */
167   if (dim == 1) {
168     ierr = DMGetColoring_DA_1d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
169   } else if (dim == 2) {
170     ierr =  DMGetColoring_DA_2d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
171   } else if (dim == 3) {
172     ierr =  DMGetColoring_DA_3d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
173   } else {
174     SETERRQ1(((PetscObject)da)->comm,PETSC_ERR_SUP,"Not done for %D dimension, send us mail petsc-maint@mcs.anl.gov for code",dim);
175   }
176   if (isBAIJ) {
177     dd->w = nc;
178     dd->xs = dd->xs*nc;
179     dd->xe = dd->xe*nc;
180     dd->Xs = dd->Xs*nc;
181     dd->Xe = dd->Xe*nc;
182   }
183   PetscFunctionReturn(0);
184 }
185 
186 /* ---------------------------------------------------------------------------------*/
187 
188 #undef __FUNCT__
189 #define __FUNCT__ "DMGetColoring_DA_2d_MPIAIJ"
190 PetscErrorCode DMGetColoring_DA_2d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
191 {
192   PetscErrorCode         ierr;
193   PetscInt               xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,M,N,dim,s,k,nc,col;
194   PetscInt               ncolors;
195   MPI_Comm               comm;
196   DMDABoundaryType       bx,by;
197   DMDAStencilType        st;
198   ISColoringValue        *colors;
199   DM_DA                  *dd = (DM_DA*)da->data;
200 
201   PetscFunctionBegin;
202   /*
203          nc - number of components per grid point
204          col - number of colors needed in one direction for single component problem
205 
206   */
207   ierr = DMDAGetInfo(da,&dim,&m,&n,0,&M,&N,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
208   col    = 2*s + 1;
209   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
210   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
211   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
212 
213   /* special case as taught to us by Paul Hovland */
214   if (st == DMDA_STENCIL_STAR && s == 1) {
215     ierr = DMGetColoring_DA_2d_5pt_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
216   } else {
217 
218     if (bx == DMDA_BOUNDARY_PERIODIC && (m % col)){
219       SETERRQ2(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X (%d) is divisible\n\
220                  by 2*stencil_width + 1 (%d)\n", m, col);
221     }
222     if (by == DMDA_BOUNDARY_PERIODIC && (n % col)){
223       SETERRQ2(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y (%d) is divisible\n\
224                  by 2*stencil_width + 1 (%d)\n", n, col);
225     }
226     if (ctype == IS_COLORING_GLOBAL) {
227       if (!dd->localcoloring) {
228 	ierr = PetscMalloc(nc*nx*ny*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
229 	ii = 0;
230 	for (j=ys; j<ys+ny; j++) {
231 	  for (i=xs; i<xs+nx; i++) {
232 	    for (k=0; k<nc; k++) {
233 	      colors[ii++] = k + nc*((i % col) + col*(j % col));
234 	    }
235 	  }
236 	}
237         ncolors = nc + nc*(col-1 + col*(col-1));
238 	ierr = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,&dd->localcoloring);CHKERRQ(ierr);
239       }
240       *coloring = dd->localcoloring;
241     } else if (ctype == IS_COLORING_GHOSTED) {
242       if (!dd->ghostedcoloring) {
243 	ierr = PetscMalloc(nc*gnx*gny*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
244 	ii = 0;
245 	for (j=gys; j<gys+gny; j++) {
246 	  for (i=gxs; i<gxs+gnx; i++) {
247 	    for (k=0; k<nc; k++) {
248 	      /* the complicated stuff is to handle periodic boundaries */
249 	      colors[ii++] = k + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col));
250 	    }
251 	  }
252 	}
253         ncolors = nc + nc*(col - 1 + col*(col-1));
254 	ierr = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,&dd->ghostedcoloring);CHKERRQ(ierr);
255         /* PetscIntView(ncolors,(PetscInt *)colors,0); */
256 
257 	ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_GHOSTED);CHKERRQ(ierr);
258       }
259       *coloring = dd->ghostedcoloring;
260     } else SETERRQ1(((PetscObject)da)->comm,PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
261   }
262   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
263   PetscFunctionReturn(0);
264 }
265 
266 /* ---------------------------------------------------------------------------------*/
267 
268 #undef __FUNCT__
269 #define __FUNCT__ "DMGetColoring_DA_3d_MPIAIJ"
270 PetscErrorCode DMGetColoring_DA_3d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
271 {
272   PetscErrorCode    ierr;
273   PetscInt          xs,ys,nx,ny,i,j,gxs,gys,gnx,gny,m,n,p,dim,s,k,nc,col,zs,gzs,ii,l,nz,gnz,M,N,P;
274   PetscInt          ncolors;
275   MPI_Comm          comm;
276   DMDABoundaryType  bx,by,bz;
277   DMDAStencilType   st;
278   ISColoringValue   *colors;
279   DM_DA             *dd = (DM_DA*)da->data;
280 
281   PetscFunctionBegin;
282   /*
283          nc - number of components per grid point
284          col - number of colors needed in one direction for single component problem
285 
286   */
287   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
288   col    = 2*s + 1;
289   if (bx == DMDA_BOUNDARY_PERIODIC && (m % col)){
290     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
291                  by 2*stencil_width + 1\n");
292   }
293   if (by == DMDA_BOUNDARY_PERIODIC && (n % col)){
294     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
295                  by 2*stencil_width + 1\n");
296   }
297   if (bz == DMDA_BOUNDARY_PERIODIC && (p % col)){
298     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
299                  by 2*stencil_width + 1\n");
300   }
301 
302   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
303   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
304   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
305 
306   /* create the coloring */
307   if (ctype == IS_COLORING_GLOBAL) {
308     if (!dd->localcoloring) {
309       ierr = PetscMalloc(nc*nx*ny*nz*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
310       ii = 0;
311       for (k=zs; k<zs+nz; k++) {
312         for (j=ys; j<ys+ny; j++) {
313           for (i=xs; i<xs+nx; i++) {
314             for (l=0; l<nc; l++) {
315               colors[ii++] = l + nc*((i % col) + col*(j % col) + col*col*(k % col));
316             }
317           }
318         }
319       }
320       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
321       ierr = ISColoringCreate(comm,ncolors,nc*nx*ny*nz,colors,&dd->localcoloring);CHKERRQ(ierr);
322     }
323     *coloring = dd->localcoloring;
324   } else if (ctype == IS_COLORING_GHOSTED) {
325     if (!dd->ghostedcoloring) {
326       ierr = PetscMalloc(nc*gnx*gny*gnz*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
327       ii = 0;
328       for (k=gzs; k<gzs+gnz; k++) {
329         for (j=gys; j<gys+gny; j++) {
330           for (i=gxs; i<gxs+gnx; i++) {
331             for (l=0; l<nc; l++) {
332               /* the complicated stuff is to handle periodic boundaries */
333               colors[ii++] = l + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col) + col*col*(SetInRange(k,p) % col));
334             }
335           }
336         }
337       }
338       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
339       ierr = ISColoringCreate(comm,ncolors,nc*gnx*gny*gnz,colors,&dd->ghostedcoloring);CHKERRQ(ierr);
340       ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_GHOSTED);CHKERRQ(ierr);
341     }
342     *coloring = dd->ghostedcoloring;
343   } else SETERRQ1(((PetscObject)da)->comm,PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
344   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
345   PetscFunctionReturn(0);
346 }
347 
348 /* ---------------------------------------------------------------------------------*/
349 
350 #undef __FUNCT__
351 #define __FUNCT__ "DMGetColoring_DA_1d_MPIAIJ"
352 PetscErrorCode DMGetColoring_DA_1d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
353 {
354   PetscErrorCode    ierr;
355   PetscInt          xs,nx,i,i1,gxs,gnx,l,m,M,dim,s,nc,col;
356   PetscInt          ncolors;
357   MPI_Comm          comm;
358   DMDABoundaryType  bx;
359   ISColoringValue   *colors;
360   DM_DA             *dd = (DM_DA*)da->data;
361 
362   PetscFunctionBegin;
363   /*
364          nc - number of components per grid point
365          col - number of colors needed in one direction for single component problem
366 
367   */
368   ierr = DMDAGetInfo(da,&dim,&m,0,0,&M,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr);
369   col    = 2*s + 1;
370 
371   if (bx == DMDA_BOUNDARY_PERIODIC && (m % col)) {
372     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points is divisible\n\
373                  by 2*stencil_width + 1\n");
374   }
375 
376   ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
377   ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
378   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
379 
380   /* create the coloring */
381   if (ctype == IS_COLORING_GLOBAL) {
382     if (!dd->localcoloring) {
383       ierr = PetscMalloc(nc*nx*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
384       i1 = 0;
385       for (i=xs; i<xs+nx; i++) {
386         for (l=0; l<nc; l++) {
387           colors[i1++] = l + nc*(i % col);
388         }
389       }
390       ncolors = nc + nc*(col-1);
391       ierr = ISColoringCreate(comm,ncolors,nc*nx,colors,&dd->localcoloring);CHKERRQ(ierr);
392     }
393     *coloring = dd->localcoloring;
394   } else if (ctype == IS_COLORING_GHOSTED) {
395     if (!dd->ghostedcoloring) {
396       ierr = PetscMalloc(nc*gnx*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
397       i1 = 0;
398       for (i=gxs; i<gxs+gnx; i++) {
399         for (l=0; l<nc; l++) {
400           /* the complicated stuff is to handle periodic boundaries */
401           colors[i1++] = l + nc*(SetInRange(i,m) % col);
402         }
403       }
404       ncolors = nc + nc*(col-1);
405       ierr = ISColoringCreate(comm,ncolors,nc*gnx,colors,&dd->ghostedcoloring);CHKERRQ(ierr);
406       ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_GHOSTED);CHKERRQ(ierr);
407     }
408     *coloring = dd->ghostedcoloring;
409   } else SETERRQ1(((PetscObject)da)->comm,PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
410   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
411   PetscFunctionReturn(0);
412 }
413 
414 #undef __FUNCT__
415 #define __FUNCT__ "DMGetColoring_DA_2d_5pt_MPIAIJ"
416 PetscErrorCode DMGetColoring_DA_2d_5pt_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
417 {
418   PetscErrorCode    ierr;
419   PetscInt          xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,dim,s,k,nc;
420   PetscInt          ncolors;
421   MPI_Comm          comm;
422   DMDABoundaryType  bx,by;
423   ISColoringValue   *colors;
424   DM_DA             *dd = (DM_DA*)da->data;
425 
426   PetscFunctionBegin;
427   /*
428          nc - number of components per grid point
429          col - number of colors needed in one direction for single component problem
430 
431   */
432   ierr   = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,0);CHKERRQ(ierr);
433   ierr   = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
434   ierr   = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
435   ierr   = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
436 
437   if (bx == DMDA_BOUNDARY_PERIODIC && (m % 5)){
438     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
439                  by 5\n");
440   }
441   if (by == DMDA_BOUNDARY_PERIODIC && (n % 5)){
442     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
443                  by 5\n");
444   }
445 
446   /* create the coloring */
447   if (ctype == IS_COLORING_GLOBAL) {
448     if (!dd->localcoloring) {
449       ierr = PetscMalloc(nc*nx*ny*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
450       ii = 0;
451       for (j=ys; j<ys+ny; j++) {
452 	for (i=xs; i<xs+nx; i++) {
453 	  for (k=0; k<nc; k++) {
454 	    colors[ii++] = k + nc*((3*j+i) % 5);
455 	  }
456 	}
457       }
458       ncolors = 5*nc;
459       ierr = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,&dd->localcoloring);CHKERRQ(ierr);
460     }
461     *coloring = dd->localcoloring;
462   } else if (ctype == IS_COLORING_GHOSTED) {
463     if (!dd->ghostedcoloring) {
464       ierr = PetscMalloc(nc*gnx*gny*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
465       ii = 0;
466       for (j=gys; j<gys+gny; j++) {
467 	for (i=gxs; i<gxs+gnx; i++) {
468 	  for (k=0; k<nc; k++) {
469 	    colors[ii++] = k + nc*((3*SetInRange(j,n) + SetInRange(i,m)) % 5);
470 	  }
471 	}
472       }
473       ncolors = 5*nc;
474       ierr = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,&dd->ghostedcoloring);CHKERRQ(ierr);
475       ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_GHOSTED);CHKERRQ(ierr);
476     }
477     *coloring = dd->ghostedcoloring;
478   } else SETERRQ1(((PetscObject)da)->comm,PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
479   PetscFunctionReturn(0);
480 }
481 
482 /* =========================================================================== */
483 extern PetscErrorCode DMGetMatrix_DA_1d_MPIAIJ(DM,Mat);
484 extern PetscErrorCode DMGetMatrix_DA_2d_MPIAIJ(DM,Mat);
485 extern PetscErrorCode DMGetMatrix_DA_2d_MPIAIJ_Fill(DM,Mat);
486 extern PetscErrorCode DMGetMatrix_DA_3d_MPIAIJ(DM,Mat);
487 extern PetscErrorCode DMGetMatrix_DA_3d_MPIAIJ_Fill(DM,Mat);
488 extern PetscErrorCode DMGetMatrix_DA_2d_MPIBAIJ(DM,Mat);
489 extern PetscErrorCode DMGetMatrix_DA_3d_MPIBAIJ(DM,Mat);
490 extern PetscErrorCode DMGetMatrix_DA_2d_MPISBAIJ(DM,Mat);
491 extern PetscErrorCode DMGetMatrix_DA_3d_MPISBAIJ(DM,Mat);
492 
493 #undef __FUNCT__
494 #define __FUNCT__ "MatSetDM"
495 /*@
496    MatSetDM - Sets the DMDA that is to be used by the HYPRE_StructMatrix PETSc matrix
497 
498    Logically Collective on Mat
499 
500    Input Parameters:
501 +  mat - the matrix
502 -  da - the da
503 
504    Level: intermediate
505 
506 @*/
507 PetscErrorCode  MatSetDM(Mat mat,DM da)
508 {
509   PetscErrorCode ierr;
510 
511   PetscFunctionBegin;
512   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
513   PetscValidHeaderSpecific(da,DM_CLASSID,1);
514   ierr = PetscTryMethod(mat,"MatSetDM_C",(Mat,DM),(mat,da));CHKERRQ(ierr);
515   PetscFunctionReturn(0);
516 }
517 
518 EXTERN_C_BEGIN
519 #undef __FUNCT__
520 #define __FUNCT__ "MatView_MPI_DA"
521 PetscErrorCode  MatView_MPI_DA(Mat A,PetscViewer viewer)
522 {
523   DM             da;
524   PetscErrorCode ierr;
525   const char     *prefix;
526   Mat            Anatural;
527   AO             ao;
528   PetscInt       rstart,rend,*petsc,i;
529   IS             is;
530   MPI_Comm       comm;
531 
532   PetscFunctionBegin;
533   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
534   ierr = PetscObjectQuery((PetscObject)A,"DM",(PetscObject*)&da);CHKERRQ(ierr);
535   if (!da) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
536 
537   ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);
538   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
539   ierr = PetscMalloc((rend-rstart)*sizeof(PetscInt),&petsc);CHKERRQ(ierr);
540   for (i=rstart; i<rend; i++) petsc[i-rstart] = i;
541   ierr = AOApplicationToPetsc(ao,rend-rstart,petsc);CHKERRQ(ierr);
542   ierr = ISCreateGeneral(comm,rend-rstart,petsc,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
543 
544   /* call viewer on natural ordering */
545   ierr = MatGetSubMatrix(A,is,is,MAT_INITIAL_MATRIX,&Anatural);CHKERRQ(ierr);
546   ierr = ISDestroy(&is);CHKERRQ(ierr);
547   ierr = PetscObjectGetOptionsPrefix((PetscObject)A,&prefix);CHKERRQ(ierr);
548   ierr = PetscObjectSetOptionsPrefix((PetscObject)Anatural,prefix);CHKERRQ(ierr);
549   ierr = PetscObjectSetName((PetscObject)Anatural,((PetscObject)A)->name);CHKERRQ(ierr);
550   ierr = MatView(Anatural,viewer);CHKERRQ(ierr);
551   ierr = MatDestroy(&Anatural);CHKERRQ(ierr);
552   PetscFunctionReturn(0);
553 }
554 EXTERN_C_END
555 
556 EXTERN_C_BEGIN
557 #undef __FUNCT__
558 #define __FUNCT__ "MatLoad_MPI_DA"
559 PetscErrorCode  MatLoad_MPI_DA(Mat A,PetscViewer viewer)
560 {
561   DM             da;
562   PetscErrorCode ierr;
563   Mat            Anatural,Aapp;
564   AO             ao;
565   PetscInt       rstart,rend,*app,i;
566   IS             is;
567   MPI_Comm       comm;
568 
569   PetscFunctionBegin;
570   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
571   ierr = PetscObjectQuery((PetscObject)A,"DM",(PetscObject*)&da);CHKERRQ(ierr);
572   if (!da) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
573 
574   /* Load the matrix in natural ordering */
575   ierr = MatCreate(((PetscObject)A)->comm,&Anatural);CHKERRQ(ierr);
576   ierr = MatSetType(Anatural,((PetscObject)A)->type_name);CHKERRQ(ierr);
577   ierr = MatSetSizes(Anatural,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
578   ierr = MatLoad(Anatural,viewer);CHKERRQ(ierr);
579 
580   /* Map natural ordering to application ordering and create IS */
581   ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);
582   ierr = MatGetOwnershipRange(Anatural,&rstart,&rend);CHKERRQ(ierr);
583   ierr = PetscMalloc((rend-rstart)*sizeof(PetscInt),&app);CHKERRQ(ierr);
584   for (i=rstart; i<rend; i++) app[i-rstart] = i;
585   ierr = AOPetscToApplication(ao,rend-rstart,app);CHKERRQ(ierr);
586   ierr = ISCreateGeneral(comm,rend-rstart,app,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
587 
588   /* Do permutation and replace header */
589   ierr = MatGetSubMatrix(Anatural,is,is,MAT_INITIAL_MATRIX,&Aapp);CHKERRQ(ierr);
590   ierr = MatHeaderReplace(A,Aapp);CHKERRQ(ierr);
591   ierr = ISDestroy(&is);CHKERRQ(ierr);
592   ierr = MatDestroy(&Anatural);CHKERRQ(ierr);
593   PetscFunctionReturn(0);
594 }
595 EXTERN_C_END
596 
597 #undef __FUNCT__
598 #define __FUNCT__ "DMGetMatrix_DA"
599 PetscErrorCode  DMGetMatrix_DA(DM da, const MatType mtype,Mat *J)
600 {
601   PetscErrorCode ierr;
602   PetscInt       dim,dof,nx,ny,nz,dims[3],starts[3],M,N,P;
603   Mat            A;
604   MPI_Comm       comm;
605   const MatType  Atype;
606   void           (*aij)(void)=PETSC_NULL,(*baij)(void)=PETSC_NULL,(*sbaij)(void)=PETSC_NULL;
607   MatType        ttype[256];
608   PetscBool      flg;
609   PetscMPIInt    size;
610   DM_DA          *dd = (DM_DA*)da->data;
611 
612   PetscFunctionBegin;
613 #ifndef PETSC_USE_DYNAMIC_LIBRARIES
614   ierr = MatInitializePackage(PETSC_NULL);CHKERRQ(ierr);
615 #endif
616   if (!mtype) mtype = MATAIJ;
617   ierr = PetscStrcpy((char*)ttype,mtype);CHKERRQ(ierr);
618   ierr = PetscOptionsBegin(((PetscObject)da)->comm,((PetscObject)da)->prefix,"DMDA options","Mat");CHKERRQ(ierr);
619   ierr = PetscOptionsList("-da_mat_type","Matrix type","MatSetType",MatList,mtype,(char*)ttype,256,&flg);CHKERRQ(ierr);
620   ierr = PetscOptionsEnd();
621 
622   /*
623                                   m
624           ------------------------------------------------------
625          |                                                     |
626          |                                                     |
627          |               ----------------------                |
628          |               |                    |                |
629       n  |           ny  |                    |                |
630          |               |                    |                |
631          |               .---------------------                |
632          |             (xs,ys)     nx                          |
633          |            .                                        |
634          |         (gxs,gys)                                   |
635          |                                                     |
636           -----------------------------------------------------
637   */
638 
639   /*
640          nc - number of components per grid point
641          col - number of colors needed in one direction for single component problem
642 
643   */
644   ierr = DMDAGetInfo(da,&dim,&M,&N,&P,0,0,0,&dof,0,0,0,0,0);CHKERRQ(ierr);
645   ierr = DMDAGetCorners(da,0,0,0,&nx,&ny,&nz);CHKERRQ(ierr);
646   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
647   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
648   ierr = MatSetSizes(A,dof*nx*ny*nz,dof*nx*ny*nz,dof*M*N*P,dof*M*N*P);CHKERRQ(ierr);
649   ierr = MatSetType(A,(const MatType)ttype);CHKERRQ(ierr);
650   ierr = MatSetDM(A,da);CHKERRQ(ierr);
651   ierr = MatSetFromOptions(A);CHKERRQ(ierr);
652   ierr = MatGetType(A,&Atype);CHKERRQ(ierr);
653   /*
654      We do not provide a getmatrix function in the DMDA operations because
655    the basic DMDA does not know about matrices. We think of DMDA as being more
656    more low-level than matrices. This is kind of cheating but, cause sometimes
657    we think of DMDA has higher level than matrices.
658 
659      We could switch based on Atype (or mtype), but we do not since the
660    specialized setting routines depend only the particular preallocation
661    details of the matrix, not the type itself.
662   */
663   ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJSetPreallocation_C",&aij);CHKERRQ(ierr);
664   if (!aij) {
665     ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJSetPreallocation_C",&aij);CHKERRQ(ierr);
666   }
667   if (!aij) {
668     ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIBAIJSetPreallocation_C",&baij);CHKERRQ(ierr);
669     if (!baij) {
670       ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",&baij);CHKERRQ(ierr);
671     }
672     if (!baij){
673       ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPISBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr);
674       if (!sbaij) {
675         ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr);
676       }
677     }
678   }
679   if (aij) {
680     if (dim == 1) {
681       ierr = DMGetMatrix_DA_1d_MPIAIJ(da,A);CHKERRQ(ierr);
682     } else if (dim == 2) {
683       if (dd->ofill) {
684         ierr = DMGetMatrix_DA_2d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
685       } else {
686         ierr = DMGetMatrix_DA_2d_MPIAIJ(da,A);CHKERRQ(ierr);
687       }
688     } else if (dim == 3) {
689       if (dd->ofill) {
690         ierr = DMGetMatrix_DA_3d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
691       } else {
692         ierr = DMGetMatrix_DA_3d_MPIAIJ(da,A);CHKERRQ(ierr);
693       }
694     }
695   } else if (baij) {
696     if (dim == 2) {
697       ierr = DMGetMatrix_DA_2d_MPIBAIJ(da,A);CHKERRQ(ierr);
698     } else if (dim == 3) {
699       ierr = DMGetMatrix_DA_3d_MPIBAIJ(da,A);CHKERRQ(ierr);
700     } else {
701       SETERRQ3(((PetscObject)da)->comm,PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension!\n" \
702 	       "Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
703     }
704   } else if (sbaij) {
705     if (dim == 2) {
706       ierr = DMGetMatrix_DA_2d_MPISBAIJ(da,A);CHKERRQ(ierr);
707     } else if (dim == 3) {
708       ierr = DMGetMatrix_DA_3d_MPISBAIJ(da,A);CHKERRQ(ierr);
709     } else {
710       SETERRQ3(((PetscObject)da)->comm,PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension!\n" \
711 	       "Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
712     }
713   }
714   ierr = DMDAGetGhostCorners(da,&starts[0],&starts[1],&starts[2],&dims[0],&dims[1],&dims[2]);CHKERRQ(ierr);
715   ierr = MatSetStencil(A,dim,dims,starts,dof);CHKERRQ(ierr);
716   ierr = PetscObjectCompose((PetscObject)A,"DM",(PetscObject)da);CHKERRQ(ierr);
717   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
718   if (size > 1) {
719     /* change viewer to display matrix in natural ordering */
720     ierr = MatShellSetOperation(A, MATOP_VIEW, (void (*)(void)) MatView_MPI_DA);CHKERRQ(ierr);
721     ierr = MatShellSetOperation(A, MATOP_LOAD, (void (*)(void)) MatLoad_MPI_DA);CHKERRQ(ierr);
722   }
723   *J = A;
724   PetscFunctionReturn(0);
725 }
726 
727 /* ---------------------------------------------------------------------------------*/
728 #undef __FUNCT__
729 #define __FUNCT__ "DMGetMatrix_DA_2d_MPIAIJ"
730 PetscErrorCode DMGetMatrix_DA_2d_MPIAIJ(DM da,Mat J)
731 {
732   PetscErrorCode         ierr;
733   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = PETSC_NULL,k,nc,*rows = PETSC_NULL,col,cnt,l,p;
734   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
735   MPI_Comm               comm;
736   PetscScalar            *values;
737   DMDABoundaryType       bx,by;
738   ISLocalToGlobalMapping ltog,ltogb;
739   DMDAStencilType        st;
740 
741   PetscFunctionBegin;
742   /*
743          nc - number of components per grid point
744          col - number of colors needed in one direction for single component problem
745 
746   */
747   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
748   col = 2*s + 1;
749   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
750   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
751   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
752 
753   ierr = PetscMalloc2(nc,PetscInt,&rows,col*col*nc*nc,PetscInt,&cols);CHKERRQ(ierr);
754   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
755   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
756 
757   /* determine the matrix preallocation information */
758   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
759   for (i=xs; i<xs+nx; i++) {
760 
761     pstart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
762     pend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
763 
764     for (j=ys; j<ys+ny; j++) {
765       slot = i - gxs + gnx*(j - gys);
766 
767       lstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
768       lend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
769 
770       cnt  = 0;
771       for (k=0; k<nc; k++) {
772 	for (l=lstart; l<lend+1; l++) {
773 	  for (p=pstart; p<pend+1; p++) {
774 	    if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
775 	      cols[cnt++]  = k + nc*(slot + gnx*l + p);
776 	    }
777 	  }
778 	}
779 	rows[k] = k + nc*(slot);
780       }
781       ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
782     }
783   }
784   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
785   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
786   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
787   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
788 
789   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
790   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
791 
792   /*
793     For each node in the grid: we get the neighbors in the local (on processor ordering
794     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
795     PETSc ordering.
796   */
797   if (!da->prealloc_only) {
798     ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
799     ierr = PetscMemzero(values,col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
800     for (i=xs; i<xs+nx; i++) {
801 
802       pstart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
803       pend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
804 
805       for (j=ys; j<ys+ny; j++) {
806 	slot = i - gxs + gnx*(j - gys);
807 
808 	lstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
809 	lend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
810 
811 	cnt  = 0;
812 	for (k=0; k<nc; k++) {
813 	  for (l=lstart; l<lend+1; l++) {
814 	    for (p=pstart; p<pend+1; p++) {
815 	      if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
816 		cols[cnt++]  = k + nc*(slot + gnx*l + p);
817 	      }
818 	    }
819 	  }
820 	  rows[k]      = k + nc*(slot);
821 	}
822 	ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
823       }
824     }
825     ierr = PetscFree(values);CHKERRQ(ierr);
826     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
827     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
828   }
829   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
830   PetscFunctionReturn(0);
831 }
832 
833 #undef __FUNCT__
834 #define __FUNCT__ "DMGetMatrix_DA_2d_MPIAIJ_Fill"
835 PetscErrorCode DMGetMatrix_DA_2d_MPIAIJ_Fill(DM da,Mat J)
836 {
837   PetscErrorCode         ierr;
838   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
839   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt,l,p;
840   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
841   DM_DA                  *dd = (DM_DA*)da->data;
842   PetscInt               ifill_col,*ofill = dd->ofill, *dfill = dd->dfill;
843   MPI_Comm               comm;
844   PetscScalar            *values;
845   DMDABoundaryType       bx,by;
846   ISLocalToGlobalMapping ltog,ltogb;
847   DMDAStencilType        st;
848 
849   PetscFunctionBegin;
850   /*
851          nc - number of components per grid point
852          col - number of colors needed in one direction for single component problem
853 
854   */
855   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
856   col = 2*s + 1;
857   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
858   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
859   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
860 
861   ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscInt),&cols);CHKERRQ(ierr);
862   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
863   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
864 
865   /* determine the matrix preallocation information */
866   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
867   for (i=xs; i<xs+nx; i++) {
868 
869     pstart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
870     pend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
871 
872     for (j=ys; j<ys+ny; j++) {
873       slot = i - gxs + gnx*(j - gys);
874 
875       lstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
876       lend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
877 
878       for (k=0; k<nc; k++) {
879         cnt  = 0;
880 	for (l=lstart; l<lend+1; l++) {
881 	  for (p=pstart; p<pend+1; p++) {
882             if (l || p) {
883 	      if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
884                 for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++)
885 		  cols[cnt++]  = ofill[ifill_col] + nc*(slot + gnx*l + p);
886 	      }
887             } else {
888 	      if (dfill) {
889 		for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++)
890 		  cols[cnt++]  = dfill[ifill_col] + nc*(slot + gnx*l + p);
891 	      } else {
892 		for (ifill_col=0; ifill_col<nc; ifill_col++)
893 		  cols[cnt++]  = ifill_col + nc*(slot + gnx*l + p);
894 	      }
895             }
896 	  }
897 	}
898 	row = k + nc*(slot);
899         ierr = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
900       }
901     }
902   }
903   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
904   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
905   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
906   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
907   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
908 
909   /*
910     For each node in the grid: we get the neighbors in the local (on processor ordering
911     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
912     PETSc ordering.
913   */
914   if (!da->prealloc_only) {
915     ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
916     ierr = PetscMemzero(values,col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
917     for (i=xs; i<xs+nx; i++) {
918 
919       pstart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
920       pend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
921 
922       for (j=ys; j<ys+ny; j++) {
923 	slot = i - gxs + gnx*(j - gys);
924 
925 	lstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
926 	lend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
927 
928 	for (k=0; k<nc; k++) {
929 	  cnt  = 0;
930 	  for (l=lstart; l<lend+1; l++) {
931 	    for (p=pstart; p<pend+1; p++) {
932 	      if (l || p) {
933 		if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
934 		  for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++)
935 		    cols[cnt++]  = ofill[ifill_col] + nc*(slot + gnx*l + p);
936 		}
937 	      } else {
938 		if (dfill) {
939 		  for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++)
940 		    cols[cnt++]  = dfill[ifill_col] + nc*(slot + gnx*l + p);
941 		} else {
942 		  for (ifill_col=0; ifill_col<nc; ifill_col++)
943 		    cols[cnt++]  = ifill_col + nc*(slot + gnx*l + p);
944 		}
945 	      }
946 	    }
947 	  }
948 	  row  = k + nc*(slot);
949 	  ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
950 	}
951       }
952     }
953     ierr = PetscFree(values);CHKERRQ(ierr);
954     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
955     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
956   }
957   ierr = PetscFree(cols);CHKERRQ(ierr);
958   PetscFunctionReturn(0);
959 }
960 
961 /* ---------------------------------------------------------------------------------*/
962 
963 #undef __FUNCT__
964 #define __FUNCT__ "DMGetMatrix_DA_3d_MPIAIJ"
965 PetscErrorCode DMGetMatrix_DA_3d_MPIAIJ(DM da,Mat J)
966 {
967   PetscErrorCode         ierr;
968   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
969   PetscInt               m,n,dim,s,*cols = PETSC_NULL,k,nc,*rows = PETSC_NULL,col,cnt,l,p,*dnz = PETSC_NULL,*onz = PETSC_NULL;
970   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
971   MPI_Comm               comm;
972   PetscScalar            *values;
973   DMDABoundaryType       bx,by,bz;
974   ISLocalToGlobalMapping ltog,ltogb;
975   DMDAStencilType        st;
976 
977   PetscFunctionBegin;
978   /*
979          nc - number of components per grid point
980          col - number of colors needed in one direction for single component problem
981 
982   */
983   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
984   col    = 2*s + 1;
985 
986   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
987   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
988   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
989 
990   ierr = PetscMalloc2(nc,PetscInt,&rows,col*col*col*nc*nc,PetscInt,&cols);CHKERRQ(ierr);
991   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
992   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
993 
994   /* determine the matrix preallocation information */
995   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
996   for (i=xs; i<xs+nx; i++) {
997     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
998     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
999     for (j=ys; j<ys+ny; j++) {
1000       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1001       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1002       for (k=zs; k<zs+nz; k++) {
1003 	kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1004 	kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1005 
1006 	slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1007 
1008 	cnt  = 0;
1009 	for (l=0; l<nc; l++) {
1010 	  for (ii=istart; ii<iend+1; ii++) {
1011 	    for (jj=jstart; jj<jend+1; jj++) {
1012 	      for (kk=kstart; kk<kend+1; kk++) {
1013 		if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1014 		  cols[cnt++]  = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1015 		}
1016 	      }
1017 	    }
1018 	  }
1019 	  rows[l] = l + nc*(slot);
1020 	}
1021 	ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1022       }
1023     }
1024   }
1025   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1026   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1027   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1028   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1029   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1030   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1031 
1032   /*
1033     For each node in the grid: we get the neighbors in the local (on processor ordering
1034     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1035     PETSc ordering.
1036   */
1037   if (!da->prealloc_only) {
1038     ierr = PetscMalloc(col*col*col*nc*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1039     ierr = PetscMemzero(values,col*col*col*nc*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1040     for (i=xs; i<xs+nx; i++) {
1041       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1042       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1043       for (j=ys; j<ys+ny; j++) {
1044 	jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1045 	jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1046 	for (k=zs; k<zs+nz; k++) {
1047 	  kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1048 	  kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1049 
1050 	  slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1051 
1052 	  cnt  = 0;
1053 	  for (l=0; l<nc; l++) {
1054 	    for (ii=istart; ii<iend+1; ii++) {
1055 	      for (jj=jstart; jj<jend+1; jj++) {
1056 		for (kk=kstart; kk<kend+1; kk++) {
1057 		  if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1058 		    cols[cnt++]  = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1059 		  }
1060 		}
1061 	      }
1062 	    }
1063 	    rows[l]      = l + nc*(slot);
1064 	  }
1065 	  ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1066 	}
1067       }
1068     }
1069     ierr = PetscFree(values);CHKERRQ(ierr);
1070     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1071     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1072   }
1073   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1074   PetscFunctionReturn(0);
1075 }
1076 
1077 /* ---------------------------------------------------------------------------------*/
1078 
1079 #undef __FUNCT__
1080 #define __FUNCT__ "DMGetMatrix_DA_1d_MPIAIJ"
1081 PetscErrorCode DMGetMatrix_DA_1d_MPIAIJ(DM da,Mat J)
1082 {
1083   PetscErrorCode         ierr;
1084   PetscInt               xs,nx,i,i1,slot,gxs,gnx;
1085   PetscInt               m,dim,s,*cols = PETSC_NULL,nc,*rows = PETSC_NULL,col,cnt,l;
1086   PetscInt               istart,iend;
1087   PetscScalar            *values;
1088   DMDABoundaryType       bx;
1089   ISLocalToGlobalMapping ltog,ltogb;
1090 
1091   PetscFunctionBegin;
1092   /*
1093          nc - number of components per grid point
1094          col - number of colors needed in one direction for single component problem
1095 
1096   */
1097   ierr = DMDAGetInfo(da,&dim,&m,0,0,0,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr);
1098   col    = 2*s + 1;
1099 
1100   ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
1101   ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
1102 
1103   ierr = MatSeqAIJSetPreallocation(J,col*nc,0);CHKERRQ(ierr);
1104   ierr = MatMPIAIJSetPreallocation(J,col*nc,0,col*nc,0);CHKERRQ(ierr);
1105   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1106   ierr = PetscMalloc2(nc,PetscInt,&rows,col*nc*nc,PetscInt,&cols);CHKERRQ(ierr);
1107 
1108   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1109   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1110   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1111   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1112 
1113   /*
1114     For each node in the grid: we get the neighbors in the local (on processor ordering
1115     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1116     PETSc ordering.
1117   */
1118   if (!da->prealloc_only) {
1119     ierr = PetscMalloc(col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1120     ierr = PetscMemzero(values,col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1121     for (i=xs; i<xs+nx; i++) {
1122       istart = PetscMax(-s,gxs - i);
1123       iend   = PetscMin(s,gxs + gnx - i - 1);
1124       slot   = i - gxs;
1125 
1126       cnt  = 0;
1127       for (l=0; l<nc; l++) {
1128 	for (i1=istart; i1<iend+1; i1++) {
1129 	  cols[cnt++] = l + nc*(slot + i1);
1130 	}
1131 	rows[l]      = l + nc*(slot);
1132       }
1133       ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1134     }
1135     ierr = PetscFree(values);CHKERRQ(ierr);
1136     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1137     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1138   }
1139   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1140   PetscFunctionReturn(0);
1141 }
1142 
1143 #undef __FUNCT__
1144 #define __FUNCT__ "DMGetMatrix_DA_2d_MPIBAIJ"
1145 PetscErrorCode DMGetMatrix_DA_2d_MPIBAIJ(DM da,Mat J)
1146 {
1147   PetscErrorCode         ierr;
1148   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1149   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
1150   PetscInt               istart,iend,jstart,jend,ii,jj;
1151   MPI_Comm               comm;
1152   PetscScalar            *values;
1153   DMDABoundaryType       bx,by;
1154   DMDAStencilType        st;
1155   ISLocalToGlobalMapping ltog,ltogb;
1156 
1157   PetscFunctionBegin;
1158   /*
1159      nc - number of components per grid point
1160      col - number of colors needed in one direction for single component problem
1161   */
1162   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
1163   col = 2*s + 1;
1164 
1165   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
1166   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
1167   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1168 
1169   ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1170 
1171   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1172   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1173 
1174   /* determine the matrix preallocation information */
1175   ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
1176   for (i=xs; i<xs+nx; i++) {
1177     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1178     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1179     for (j=ys; j<ys+ny; j++) {
1180       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1181       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1182       slot = i - gxs + gnx*(j - gys);
1183 
1184       /* Find block columns in block row */
1185       cnt  = 0;
1186       for (ii=istart; ii<iend+1; ii++) {
1187         for (jj=jstart; jj<jend+1; jj++) {
1188           if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1189             cols[cnt++]  = slot + ii + gnx*jj;
1190           }
1191         }
1192       }
1193       ierr = MatPreallocateSetLocal(ltogb,1,&slot,ltogb,cnt,cols,dnz,onz);CHKERRQ(ierr);
1194     }
1195   }
1196   ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1197   ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1198   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1199 
1200   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1201   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1202 
1203   /*
1204     For each node in the grid: we get the neighbors in the local (on processor ordering
1205     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1206     PETSc ordering.
1207   */
1208   if (!da->prealloc_only) {
1209     ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1210     ierr = PetscMemzero(values,col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1211     for (i=xs; i<xs+nx; i++) {
1212       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1213       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1214       for (j=ys; j<ys+ny; j++) {
1215 	jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1216 	jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1217 	slot = i - gxs + gnx*(j - gys);
1218 	cnt  = 0;
1219         for (ii=istart; ii<iend+1; ii++) {
1220           for (jj=jstart; jj<jend+1; jj++) {
1221             if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1222               cols[cnt++]  = slot + ii + gnx*jj;
1223             }
1224           }
1225         }
1226 	ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1227       }
1228     }
1229     ierr = PetscFree(values);CHKERRQ(ierr);
1230     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1231     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1232   }
1233   ierr = PetscFree(cols);CHKERRQ(ierr);
1234   PetscFunctionReturn(0);
1235 }
1236 
1237 #undef __FUNCT__
1238 #define __FUNCT__ "DMGetMatrix_DA_3d_MPIBAIJ"
1239 PetscErrorCode DMGetMatrix_DA_3d_MPIBAIJ(DM da,Mat J)
1240 {
1241   PetscErrorCode         ierr;
1242   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1243   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
1244   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1245   MPI_Comm               comm;
1246   PetscScalar            *values;
1247   DMDABoundaryType       bx,by,bz;
1248   DMDAStencilType        st;
1249   ISLocalToGlobalMapping ltog,ltogb;
1250 
1251   PetscFunctionBegin;
1252   /*
1253          nc - number of components per grid point
1254          col - number of colors needed in one direction for single component problem
1255 
1256   */
1257   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1258   col    = 2*s + 1;
1259 
1260   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1261   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1262   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1263 
1264   ierr  = PetscMalloc(col*col*col*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1265 
1266   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1267   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1268 
1269   /* determine the matrix preallocation information */
1270   ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1271   for (i=xs; i<xs+nx; i++) {
1272     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1273     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1274     for (j=ys; j<ys+ny; j++) {
1275       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1276       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1277       for (k=zs; k<zs+nz; k++) {
1278 	kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1279 	kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1280 
1281 	slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1282 
1283 	/* Find block columns in block row */
1284 	cnt  = 0;
1285         for (ii=istart; ii<iend+1; ii++) {
1286           for (jj=jstart; jj<jend+1; jj++) {
1287             for (kk=kstart; kk<kend+1; kk++) {
1288               if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1289 		cols[cnt++]  = slot + ii + gnx*jj + gnx*gny*kk;
1290 	      }
1291 	    }
1292 	  }
1293 	}
1294 	ierr = MatPreallocateSetLocal(ltogb,1,&slot,ltogb,cnt,cols,dnz,onz);CHKERRQ(ierr);
1295       }
1296     }
1297   }
1298   ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1299   ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1300   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1301 
1302   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1303   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1304 
1305   /*
1306     For each node in the grid: we get the neighbors in the local (on processor ordering
1307     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1308     PETSc ordering.
1309   */
1310   if (!da->prealloc_only) {
1311     ierr  = PetscMalloc(col*col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1312     ierr  = PetscMemzero(values,col*col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1313     for (i=xs; i<xs+nx; i++) {
1314       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1315       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1316       for (j=ys; j<ys+ny; j++) {
1317 	jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1318 	jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1319 	for (k=zs; k<zs+nz; k++) {
1320 	  kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1321 	  kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1322 
1323 	  slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1324 
1325 	  cnt  = 0;
1326           for (ii=istart; ii<iend+1; ii++) {
1327             for (jj=jstart; jj<jend+1; jj++) {
1328               for (kk=kstart; kk<kend+1; kk++) {
1329                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1330                   cols[cnt++]  = slot + ii + gnx*jj + gnx*gny*kk;
1331                 }
1332               }
1333             }
1334           }
1335 	  ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1336 	}
1337       }
1338     }
1339     ierr = PetscFree(values);CHKERRQ(ierr);
1340     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1341     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1342   }
1343   ierr = PetscFree(cols);CHKERRQ(ierr);
1344   PetscFunctionReturn(0);
1345 }
1346 
1347 #undef __FUNCT__
1348 #define __FUNCT__ "L2GFilterUpperTriangular"
1349 /*
1350   This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to
1351   identify in the local ordering with periodic domain.
1352 */
1353 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog,PetscInt *row,PetscInt *cnt,PetscInt col[])
1354 {
1355   PetscErrorCode ierr;
1356   PetscInt       i,n;
1357 
1358   PetscFunctionBegin;
1359   ierr = ISLocalToGlobalMappingApply(ltog,1,row,row);CHKERRQ(ierr);
1360   ierr = ISLocalToGlobalMappingApply(ltog,*cnt,col,col);CHKERRQ(ierr);
1361   for (i=0,n=0; i<*cnt; i++) {
1362     if (col[i] >= *row) col[n++] = col[i];
1363   }
1364   *cnt = n;
1365   PetscFunctionReturn(0);
1366 }
1367 
1368 #undef __FUNCT__
1369 #define __FUNCT__ "DMGetMatrix_DA_2d_MPISBAIJ"
1370 PetscErrorCode DMGetMatrix_DA_2d_MPISBAIJ(DM da,Mat J)
1371 {
1372   PetscErrorCode         ierr;
1373   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1374   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
1375   PetscInt               istart,iend,jstart,jend,ii,jj;
1376   MPI_Comm               comm;
1377   PetscScalar            *values;
1378   DMDABoundaryType       bx,by;
1379   DMDAStencilType        st;
1380   ISLocalToGlobalMapping ltog,ltogb;
1381 
1382   PetscFunctionBegin;
1383   /*
1384      nc - number of components per grid point
1385      col - number of colors needed in one direction for single component problem
1386   */
1387   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
1388   col = 2*s + 1;
1389 
1390   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
1391   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
1392   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1393 
1394   ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1395 
1396   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1397   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1398 
1399   /* determine the matrix preallocation information */
1400   ierr = MatPreallocateSymmetricInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
1401   for (i=xs; i<xs+nx; i++) {
1402     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1403     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1404     for (j=ys; j<ys+ny; j++) {
1405       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1406       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1407       slot = i - gxs + gnx*(j - gys);
1408 
1409       /* Find block columns in block row */
1410       cnt  = 0;
1411       for (ii=istart; ii<iend+1; ii++) {
1412         for (jj=jstart; jj<jend+1; jj++) {
1413           if (st == DMDA_STENCIL_BOX || !ii || !jj) {
1414             cols[cnt++]  = slot + ii + gnx*jj;
1415           }
1416         }
1417       }
1418       ierr = L2GFilterUpperTriangular(ltogb,&slot,&cnt,cols);CHKERRQ(ierr);
1419       ierr = MatPreallocateSymmetricSet(slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
1420     }
1421   }
1422   ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1423   ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1424   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1425 
1426   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1427   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1428 
1429   /*
1430     For each node in the grid: we get the neighbors in the local (on processor ordering
1431     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1432     PETSc ordering.
1433   */
1434   if (!da->prealloc_only) {
1435     ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1436     ierr = PetscMemzero(values,col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1437     for (i=xs; i<xs+nx; i++) {
1438       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1439       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1440       for (j=ys; j<ys+ny; j++) {
1441         jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1442         jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1443         slot = i - gxs + gnx*(j - gys);
1444 
1445         /* Find block columns in block row */
1446         cnt  = 0;
1447         for (ii=istart; ii<iend+1; ii++) {
1448           for (jj=jstart; jj<jend+1; jj++) {
1449             if (st == DMDA_STENCIL_BOX || !ii || !jj) {
1450               cols[cnt++]  = slot + ii + gnx*jj;
1451             }
1452           }
1453         }
1454         ierr = L2GFilterUpperTriangular(ltogb,&slot,&cnt,cols);CHKERRQ(ierr);
1455 	ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1456       }
1457     }
1458     ierr = PetscFree(values);CHKERRQ(ierr);
1459     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1460     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1461   }
1462   ierr = PetscFree(cols);CHKERRQ(ierr);
1463   PetscFunctionReturn(0);
1464 }
1465 
1466 #undef __FUNCT__
1467 #define __FUNCT__ "DMGetMatrix_DA_3d_MPISBAIJ"
1468 PetscErrorCode DMGetMatrix_DA_3d_MPISBAIJ(DM da,Mat J)
1469 {
1470   PetscErrorCode         ierr;
1471   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1472   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
1473   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1474   MPI_Comm               comm;
1475   PetscScalar            *values;
1476   DMDABoundaryType       bx,by,bz;
1477   DMDAStencilType        st;
1478   ISLocalToGlobalMapping ltog,ltogb;
1479 
1480   PetscFunctionBegin;
1481   /*
1482      nc - number of components per grid point
1483      col - number of colors needed in one direction for single component problem
1484   */
1485   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1486   col = 2*s + 1;
1487 
1488   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1489   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1490   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1491 
1492   /* create the matrix */
1493   ierr = PetscMalloc(col*col*col*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1494 
1495   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1496   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1497 
1498   /* determine the matrix preallocation information */
1499   ierr = MatPreallocateSymmetricInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1500   for (i=xs; i<xs+nx; i++) {
1501     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1502     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1503     for (j=ys; j<ys+ny; j++) {
1504       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1505       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1506       for (k=zs; k<zs+nz; k++) {
1507         kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1508 	kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1509 
1510 	slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1511 
1512 	/* Find block columns in block row */
1513 	cnt  = 0;
1514         for (ii=istart; ii<iend+1; ii++) {
1515           for (jj=jstart; jj<jend+1; jj++) {
1516             for (kk=kstart; kk<kend+1; kk++) {
1517               if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
1518                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1519               }
1520             }
1521           }
1522         }
1523         ierr = L2GFilterUpperTriangular(ltogb,&slot,&cnt,cols);CHKERRQ(ierr);
1524         ierr = MatPreallocateSymmetricSet(slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
1525       }
1526     }
1527   }
1528   ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1529   ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1530   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1531 
1532   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1533   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1534 
1535   /*
1536     For each node in the grid: we get the neighbors in the local (on processor ordering
1537     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1538     PETSc ordering.
1539   */
1540   if (!da->prealloc_only) {
1541     ierr = PetscMalloc(col*col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1542     ierr = PetscMemzero(values,col*col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1543     for (i=xs; i<xs+nx; i++) {
1544       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1545       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1546       for (j=ys; j<ys+ny; j++) {
1547         jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1548 	jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1549 	for (k=zs; k<zs+nz; k++) {
1550           kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1551 	  kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1552 
1553 	  slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1554 
1555 	  cnt  = 0;
1556           for (ii=istart; ii<iend+1; ii++) {
1557             for (jj=jstart; jj<jend+1; jj++) {
1558               for (kk=kstart; kk<kend+1; kk++) {
1559                 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
1560 		  cols[cnt++]  = slot + ii + gnx*jj + gnx*gny*kk;
1561 		}
1562 	      }
1563 	    }
1564 	  }
1565           ierr = L2GFilterUpperTriangular(ltogb,&slot,&cnt,cols);CHKERRQ(ierr);
1566           ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1567 	}
1568       }
1569     }
1570     ierr = PetscFree(values);CHKERRQ(ierr);
1571     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1572     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1573   }
1574   ierr = PetscFree(cols);CHKERRQ(ierr);
1575   PetscFunctionReturn(0);
1576 }
1577 
1578 /* ---------------------------------------------------------------------------------*/
1579 
1580 #undef __FUNCT__
1581 #define __FUNCT__ "DMGetMatrix_DA_3d_MPIAIJ_Fill"
1582 PetscErrorCode DMGetMatrix_DA_3d_MPIAIJ_Fill(DM da,Mat J)
1583 {
1584   PetscErrorCode         ierr;
1585   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1586   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt,l,p,*dnz,*onz;
1587   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1588   DM_DA                  *dd = (DM_DA*)da->data;
1589   PetscInt               ifill_col,*dfill = dd->dfill,*ofill = dd->ofill;
1590   MPI_Comm               comm;
1591   PetscScalar            *values;
1592   DMDABoundaryType       bx,by,bz;
1593   ISLocalToGlobalMapping ltog,ltogb;
1594   DMDAStencilType        st;
1595 
1596   PetscFunctionBegin;
1597   /*
1598          nc - number of components per grid point
1599          col - number of colors needed in one direction for single component problem
1600 
1601   */
1602   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1603   col    = 2*s + 1;
1604   if (bx == DMDA_BOUNDARY_PERIODIC && (m % col)){
1605     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
1606                  by 2*stencil_width + 1\n");
1607   }
1608   if (by == DMDA_BOUNDARY_PERIODIC && (n % col)){
1609     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
1610                  by 2*stencil_width + 1\n");
1611   }
1612   if (bz == DMDA_BOUNDARY_PERIODIC && (p % col)){
1613     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
1614                  by 2*stencil_width + 1\n");
1615   }
1616 
1617   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1618   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1619   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1620 
1621   ierr = PetscMalloc(col*col*col*nc*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1622   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1623   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1624 
1625   /* determine the matrix preallocation information */
1626   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1627 
1628 
1629   for (i=xs; i<xs+nx; i++) {
1630     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1631     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1632     for (j=ys; j<ys+ny; j++) {
1633       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1634       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1635       for (k=zs; k<zs+nz; k++) {
1636         kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1637         kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1638 
1639         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1640 
1641 	for (l=0; l<nc; l++) {
1642 	  cnt  = 0;
1643 	  for (ii=istart; ii<iend+1; ii++) {
1644 	    for (jj=jstart; jj<jend+1; jj++) {
1645 	      for (kk=kstart; kk<kend+1; kk++) {
1646 		if (ii || jj || kk) {
1647 		  if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1648 		    for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++)
1649 		      cols[cnt++]  = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1650 		  }
1651 		} else {
1652 		  if (dfill) {
1653 		    for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++)
1654 		      cols[cnt++]  = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1655 		  } else {
1656 		    for (ifill_col=0; ifill_col<nc; ifill_col++)
1657 		      cols[cnt++]  = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1658 		  }
1659 		}
1660 	      }
1661 	    }
1662 	  }
1663 	  row  = l + nc*(slot);
1664 	  ierr = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1665 	}
1666       }
1667     }
1668   }
1669   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1670   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1671   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1672   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1673   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1674 
1675   /*
1676     For each node in the grid: we get the neighbors in the local (on processor ordering
1677     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1678     PETSc ordering.
1679   */
1680   if (!da->prealloc_only) {
1681     ierr = PetscMalloc(col*col*col*nc*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1682     ierr = PetscMemzero(values,col*col*col*nc*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1683     for (i=xs; i<xs+nx; i++) {
1684       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1685       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1686       for (j=ys; j<ys+ny; j++) {
1687 	jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1688 	jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1689 	for (k=zs; k<zs+nz; k++) {
1690 	  kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1691 	  kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1692 
1693 	  slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1694 
1695 	  for (l=0; l<nc; l++) {
1696 	    cnt  = 0;
1697 	    for (ii=istart; ii<iend+1; ii++) {
1698 	      for (jj=jstart; jj<jend+1; jj++) {
1699 		for (kk=kstart; kk<kend+1; kk++) {
1700 		  if (ii || jj || kk) {
1701 		    if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1702 		      for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++)
1703 			cols[cnt++]  = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1704 		    }
1705 		  } else {
1706 		    if (dfill) {
1707 		      for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++)
1708 			cols[cnt++]  = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1709 		    } else {
1710 		      for (ifill_col=0; ifill_col<nc; ifill_col++)
1711 			cols[cnt++]  = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1712 		    }
1713 		  }
1714 		}
1715 	      }
1716 	    }
1717 	    row  = l + nc*(slot);
1718 	    ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1719 	  }
1720 	}
1721       }
1722     }
1723     ierr = PetscFree(values);CHKERRQ(ierr);
1724     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1725     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1726   }
1727   ierr = PetscFree(cols);CHKERRQ(ierr);
1728   PetscFunctionReturn(0);
1729 }
1730