xref: /petsc/src/dm/impls/da/fdda.c (revision 247e2d9283ff1bbf8950108a11f1a3a3a92a3dd5) !
1 
2 #include <private/daimpl.h> /*I      "petscdmda.h"     I*/
3 #include <petscmat.h>         /*I      "petscmat.h"    I*/
4 #include <private/matimpl.h>
5 
6 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM,ISColoringType,ISColoring *);
7 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM,ISColoringType,ISColoring *);
8 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM,ISColoringType,ISColoring *);
9 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM,ISColoringType,ISColoring *);
10 
11 /*
12    For ghost i that may be negative or greater than the upper bound this
13   maps it into the 0:m-1 range using periodicity
14 */
15 #define SetInRange(i,m) ((i < 0) ? m+i:((i >= m) ? i-m:i))
16 
17 #undef __FUNCT__
18 #define __FUNCT__ "DMDASetBlockFills_Private"
19 static PetscErrorCode DMDASetBlockFills_Private(PetscInt *dfill,PetscInt w,PetscInt **rfill)
20 {
21   PetscErrorCode ierr;
22   PetscInt       i,j,nz,*fill;
23 
24   PetscFunctionBegin;
25   if (!dfill) PetscFunctionReturn(0);
26 
27   /* count number nonzeros */
28   nz = 0;
29   for (i=0; i<w; i++) {
30     for (j=0; j<w; j++) {
31       if (dfill[w*i+j]) nz++;
32     }
33   }
34   ierr = PetscMalloc((nz + w + 1)*sizeof(PetscInt),&fill);CHKERRQ(ierr);
35   /* construct modified CSR storage of nonzero structure */
36   nz = w + 1;
37   for (i=0; i<w; i++) {
38     fill[i] = nz;
39     for (j=0; j<w; j++) {
40       if (dfill[w*i+j]) {
41 	fill[nz] = j;
42 	nz++;
43       }
44     }
45   }
46   fill[w] = nz;
47 
48   *rfill = fill;
49   PetscFunctionReturn(0);
50 }
51 
52 #undef __FUNCT__
53 #define __FUNCT__ "DMDASetBlockFills"
54 /*@
55     DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem
56     of the matrix returned by DMCreateMatrix().
57 
58     Logically Collective on DMDA
59 
60     Input Parameter:
61 +   da - the distributed array
62 .   dfill - the fill pattern in the diagonal block (may be PETSC_NULL, means use dense block)
63 -   ofill - the fill pattern in the off-diagonal blocks
64 
65 
66     Level: developer
67 
68     Notes: This only makes sense when you are doing multicomponent problems but using the
69        MPIAIJ matrix format
70 
71            The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries
72        representing coupling and 0 entries for missing coupling. For example
73 $             dfill[9] = {1, 0, 0,
74 $                         1, 1, 0,
75 $                         0, 1, 1}
76        means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with
77        itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the
78        diagonal block).
79 
80      DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
81      can be represented in the dfill, ofill format
82 
83    Contributed by Glenn Hammond
84 
85 .seealso DMCreateMatrix(), DMDASetGetMatrix(), DMDASetMatPreallocateOnly()
86 
87 @*/
88 PetscErrorCode  DMDASetBlockFills(DM da,PetscInt *dfill,PetscInt *ofill)
89 {
90   DM_DA          *dd = (DM_DA*)da->data;
91   PetscErrorCode ierr;
92 
93   PetscFunctionBegin;
94   ierr = DMDASetBlockFills_Private(dfill,dd->w,&dd->dfill);CHKERRQ(ierr);
95   ierr = DMDASetBlockFills_Private(ofill,dd->w,&dd->ofill);CHKERRQ(ierr);
96   PetscFunctionReturn(0);
97 }
98 
99 
100 #undef __FUNCT__
101 #define __FUNCT__ "DMCreateColoring_DA"
102 PetscErrorCode  DMCreateColoring_DA(DM da,ISColoringType ctype,const MatType mtype,ISColoring *coloring)
103 {
104   PetscErrorCode   ierr;
105   PetscInt         dim,m,n,p,nc;
106   DMDABoundaryType bx,by,bz;
107   MPI_Comm         comm;
108   PetscMPIInt      size;
109   PetscBool        isBAIJ;
110   DM_DA            *dd = (DM_DA*)da->data;
111 
112   PetscFunctionBegin;
113   /*
114                                   m
115           ------------------------------------------------------
116          |                                                     |
117          |                                                     |
118          |               ----------------------                |
119          |               |                    |                |
120       n  |           yn  |                    |                |
121          |               |                    |                |
122          |               .---------------------                |
123          |             (xs,ys)     xn                          |
124          |            .                                        |
125          |         (gxs,gys)                                   |
126          |                                                     |
127           -----------------------------------------------------
128   */
129 
130   /*
131          nc - number of components per grid point
132          col - number of colors needed in one direction for single component problem
133 
134   */
135   ierr = DMDAGetInfo(da,&dim,0,0,0,&m,&n,&p,&nc,0,&bx,&by,&bz,0);CHKERRQ(ierr);
136 
137   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
138   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
139   if (ctype == IS_COLORING_GHOSTED){
140     if (size == 1) {
141       ctype = IS_COLORING_GLOBAL;
142     } else if (dim > 1){
143       if ((m==1 && bx == DMDA_BOUNDARY_PERIODIC) || (n==1 && by == DMDA_BOUNDARY_PERIODIC) || (p==1 && bz == DMDA_BOUNDARY_PERIODIC)){
144         SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"IS_COLORING_GHOSTED cannot be used for periodic boundary condition having both ends of the domain  on the same process");
145       }
146     }
147   }
148 
149   /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ
150      matrices is for the blocks, not the individual matrix elements  */
151   ierr = PetscStrcmp(mtype,MATBAIJ,&isBAIJ);CHKERRQ(ierr);
152   if (!isBAIJ) {ierr = PetscStrcmp(mtype,MATMPIBAIJ,&isBAIJ);CHKERRQ(ierr);}
153   if (!isBAIJ) {ierr = PetscStrcmp(mtype,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);}
154   if (isBAIJ) {
155     dd->w = 1;
156     dd->xs = dd->xs/nc;
157     dd->xe = dd->xe/nc;
158     dd->Xs = dd->Xs/nc;
159     dd->Xe = dd->Xe/nc;
160   }
161 
162   /*
163      We do not provide a getcoloring function in the DMDA operations because
164    the basic DMDA does not know about matrices. We think of DMDA as being more
165    more low-level then matrices.
166   */
167   if (dim == 1) {
168     ierr = DMCreateColoring_DA_1d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
169   } else if (dim == 2) {
170     ierr =  DMCreateColoring_DA_2d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
171   } else if (dim == 3) {
172     ierr =  DMCreateColoring_DA_3d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
173   } else SETERRQ1(((PetscObject)da)->comm,PETSC_ERR_SUP,"Not done for %D dimension, send us mail petsc-maint@mcs.anl.gov for code",dim);
174   if (isBAIJ) {
175     dd->w = nc;
176     dd->xs = dd->xs*nc;
177     dd->xe = dd->xe*nc;
178     dd->Xs = dd->Xs*nc;
179     dd->Xe = dd->Xe*nc;
180   }
181   PetscFunctionReturn(0);
182 }
183 
184 /* ---------------------------------------------------------------------------------*/
185 
186 #undef __FUNCT__
187 #define __FUNCT__ "DMCreateColoring_DA_2d_MPIAIJ"
188 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
189 {
190   PetscErrorCode         ierr;
191   PetscInt               xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,M,N,dim,s,k,nc,col;
192   PetscInt               ncolors;
193   MPI_Comm               comm;
194   DMDABoundaryType       bx,by;
195   DMDAStencilType        st;
196   ISColoringValue        *colors;
197   DM_DA                  *dd = (DM_DA*)da->data;
198 
199   PetscFunctionBegin;
200   /*
201          nc - number of components per grid point
202          col - number of colors needed in one direction for single component problem
203 
204   */
205   ierr = DMDAGetInfo(da,&dim,&m,&n,0,&M,&N,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
206   col    = 2*s + 1;
207   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
208   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
209   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
210 
211   /* special case as taught to us by Paul Hovland */
212   if (st == DMDA_STENCIL_STAR && s == 1) {
213     ierr = DMCreateColoring_DA_2d_5pt_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
214   } else {
215 
216     if (bx == DMDA_BOUNDARY_PERIODIC && (m % col)){
217       SETERRQ2(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X (%d) is divisible\n\
218                  by 2*stencil_width + 1 (%d)\n", m, col);
219     }
220     if (by == DMDA_BOUNDARY_PERIODIC && (n % col)){
221       SETERRQ2(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y (%d) is divisible\n\
222                  by 2*stencil_width + 1 (%d)\n", n, col);
223     }
224     if (ctype == IS_COLORING_GLOBAL) {
225       if (!dd->localcoloring) {
226 	ierr = PetscMalloc(nc*nx*ny*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
227 	ii = 0;
228 	for (j=ys; j<ys+ny; j++) {
229 	  for (i=xs; i<xs+nx; i++) {
230 	    for (k=0; k<nc; k++) {
231 	      colors[ii++] = k + nc*((i % col) + col*(j % col));
232 	    }
233 	  }
234 	}
235         ncolors = nc + nc*(col-1 + col*(col-1));
236 	ierr = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,&dd->localcoloring);CHKERRQ(ierr);
237       }
238       *coloring = dd->localcoloring;
239     } else if (ctype == IS_COLORING_GHOSTED) {
240       if (!dd->ghostedcoloring) {
241 	ierr = PetscMalloc(nc*gnx*gny*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
242 	ii = 0;
243 	for (j=gys; j<gys+gny; j++) {
244 	  for (i=gxs; i<gxs+gnx; i++) {
245 	    for (k=0; k<nc; k++) {
246 	      /* the complicated stuff is to handle periodic boundaries */
247 	      colors[ii++] = k + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col));
248 	    }
249 	  }
250 	}
251         ncolors = nc + nc*(col - 1 + col*(col-1));
252 	ierr = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,&dd->ghostedcoloring);CHKERRQ(ierr);
253         /* PetscIntView(ncolors,(PetscInt *)colors,0); */
254 
255 	ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_GHOSTED);CHKERRQ(ierr);
256       }
257       *coloring = dd->ghostedcoloring;
258     } else SETERRQ1(((PetscObject)da)->comm,PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
259   }
260   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
261   PetscFunctionReturn(0);
262 }
263 
264 /* ---------------------------------------------------------------------------------*/
265 
266 #undef __FUNCT__
267 #define __FUNCT__ "DMCreateColoring_DA_3d_MPIAIJ"
268 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
269 {
270   PetscErrorCode    ierr;
271   PetscInt          xs,ys,nx,ny,i,j,gxs,gys,gnx,gny,m,n,p,dim,s,k,nc,col,zs,gzs,ii,l,nz,gnz,M,N,P;
272   PetscInt          ncolors;
273   MPI_Comm          comm;
274   DMDABoundaryType  bx,by,bz;
275   DMDAStencilType   st;
276   ISColoringValue   *colors;
277   DM_DA             *dd = (DM_DA*)da->data;
278 
279   PetscFunctionBegin;
280   /*
281          nc - number of components per grid point
282          col - number of colors needed in one direction for single component problem
283 
284   */
285   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
286   col    = 2*s + 1;
287   if (bx == DMDA_BOUNDARY_PERIODIC && (m % col)){
288     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
289                  by 2*stencil_width + 1\n");
290   }
291   if (by == DMDA_BOUNDARY_PERIODIC && (n % col)){
292     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
293                  by 2*stencil_width + 1\n");
294   }
295   if (bz == DMDA_BOUNDARY_PERIODIC && (p % col)){
296     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
297                  by 2*stencil_width + 1\n");
298   }
299 
300   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
301   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
302   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
303 
304   /* create the coloring */
305   if (ctype == IS_COLORING_GLOBAL) {
306     if (!dd->localcoloring) {
307       ierr = PetscMalloc(nc*nx*ny*nz*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
308       ii = 0;
309       for (k=zs; k<zs+nz; k++) {
310         for (j=ys; j<ys+ny; j++) {
311           for (i=xs; i<xs+nx; i++) {
312             for (l=0; l<nc; l++) {
313               colors[ii++] = l + nc*((i % col) + col*(j % col) + col*col*(k % col));
314             }
315           }
316         }
317       }
318       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
319       ierr = ISColoringCreate(comm,ncolors,nc*nx*ny*nz,colors,&dd->localcoloring);CHKERRQ(ierr);
320     }
321     *coloring = dd->localcoloring;
322   } else if (ctype == IS_COLORING_GHOSTED) {
323     if (!dd->ghostedcoloring) {
324       ierr = PetscMalloc(nc*gnx*gny*gnz*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
325       ii = 0;
326       for (k=gzs; k<gzs+gnz; k++) {
327         for (j=gys; j<gys+gny; j++) {
328           for (i=gxs; i<gxs+gnx; i++) {
329             for (l=0; l<nc; l++) {
330               /* the complicated stuff is to handle periodic boundaries */
331               colors[ii++] = l + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col) + col*col*(SetInRange(k,p) % col));
332             }
333           }
334         }
335       }
336       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
337       ierr = ISColoringCreate(comm,ncolors,nc*gnx*gny*gnz,colors,&dd->ghostedcoloring);CHKERRQ(ierr);
338       ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_GHOSTED);CHKERRQ(ierr);
339     }
340     *coloring = dd->ghostedcoloring;
341   } else SETERRQ1(((PetscObject)da)->comm,PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
342   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
343   PetscFunctionReturn(0);
344 }
345 
346 /* ---------------------------------------------------------------------------------*/
347 
348 #undef __FUNCT__
349 #define __FUNCT__ "DMCreateColoring_DA_1d_MPIAIJ"
350 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
351 {
352   PetscErrorCode    ierr;
353   PetscInt          xs,nx,i,i1,gxs,gnx,l,m,M,dim,s,nc,col;
354   PetscInt          ncolors;
355   MPI_Comm          comm;
356   DMDABoundaryType  bx;
357   ISColoringValue   *colors;
358   DM_DA             *dd = (DM_DA*)da->data;
359 
360   PetscFunctionBegin;
361   /*
362          nc - number of components per grid point
363          col - number of colors needed in one direction for single component problem
364 
365   */
366   ierr = DMDAGetInfo(da,&dim,&m,0,0,&M,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr);
367   col    = 2*s + 1;
368 
369   if (bx == DMDA_BOUNDARY_PERIODIC && (m % col)) {
370     SETERRQ2(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points %d is divisible\n\
371                  by 2*stencil_width + 1 %d\n",(int)m,(int)col);
372   }
373 
374   ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
375   ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
376   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
377 
378   /* create the coloring */
379   if (ctype == IS_COLORING_GLOBAL) {
380     if (!dd->localcoloring) {
381       ierr = PetscMalloc(nc*nx*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
382       i1 = 0;
383       for (i=xs; i<xs+nx; i++) {
384         for (l=0; l<nc; l++) {
385           colors[i1++] = l + nc*(i % col);
386         }
387       }
388       ncolors = nc + nc*(col-1);
389       ierr = ISColoringCreate(comm,ncolors,nc*nx,colors,&dd->localcoloring);CHKERRQ(ierr);
390     }
391     *coloring = dd->localcoloring;
392   } else if (ctype == IS_COLORING_GHOSTED) {
393     if (!dd->ghostedcoloring) {
394       ierr = PetscMalloc(nc*gnx*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
395       i1 = 0;
396       for (i=gxs; i<gxs+gnx; i++) {
397         for (l=0; l<nc; l++) {
398           /* the complicated stuff is to handle periodic boundaries */
399           colors[i1++] = l + nc*(SetInRange(i,m) % col);
400         }
401       }
402       ncolors = nc + nc*(col-1);
403       ierr = ISColoringCreate(comm,ncolors,nc*gnx,colors,&dd->ghostedcoloring);CHKERRQ(ierr);
404       ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_GHOSTED);CHKERRQ(ierr);
405     }
406     *coloring = dd->ghostedcoloring;
407   } else SETERRQ1(((PetscObject)da)->comm,PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
408   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
409   PetscFunctionReturn(0);
410 }
411 
412 #undef __FUNCT__
413 #define __FUNCT__ "DMCreateColoring_DA_2d_5pt_MPIAIJ"
414 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
415 {
416   PetscErrorCode    ierr;
417   PetscInt          xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,dim,s,k,nc;
418   PetscInt          ncolors;
419   MPI_Comm          comm;
420   DMDABoundaryType  bx,by;
421   ISColoringValue   *colors;
422   DM_DA             *dd = (DM_DA*)da->data;
423 
424   PetscFunctionBegin;
425   /*
426          nc - number of components per grid point
427          col - number of colors needed in one direction for single component problem
428 
429   */
430   ierr   = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,0);CHKERRQ(ierr);
431   ierr   = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
432   ierr   = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
433   ierr   = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
434 
435   if (bx == DMDA_BOUNDARY_PERIODIC && (m % 5)){
436     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
437                  by 5\n");
438   }
439   if (by == DMDA_BOUNDARY_PERIODIC && (n % 5)){
440     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
441                  by 5\n");
442   }
443 
444   /* create the coloring */
445   if (ctype == IS_COLORING_GLOBAL) {
446     if (!dd->localcoloring) {
447       ierr = PetscMalloc(nc*nx*ny*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
448       ii = 0;
449       for (j=ys; j<ys+ny; j++) {
450 	for (i=xs; i<xs+nx; i++) {
451 	  for (k=0; k<nc; k++) {
452 	    colors[ii++] = k + nc*((3*j+i) % 5);
453 	  }
454 	}
455       }
456       ncolors = 5*nc;
457       ierr = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,&dd->localcoloring);CHKERRQ(ierr);
458     }
459     *coloring = dd->localcoloring;
460   } else if (ctype == IS_COLORING_GHOSTED) {
461     if (!dd->ghostedcoloring) {
462       ierr = PetscMalloc(nc*gnx*gny*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
463       ii = 0;
464       for (j=gys; j<gys+gny; j++) {
465 	for (i=gxs; i<gxs+gnx; i++) {
466 	  for (k=0; k<nc; k++) {
467 	    colors[ii++] = k + nc*((3*SetInRange(j,n) + SetInRange(i,m)) % 5);
468 	  }
469 	}
470       }
471       ncolors = 5*nc;
472       ierr = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,&dd->ghostedcoloring);CHKERRQ(ierr);
473       ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_GHOSTED);CHKERRQ(ierr);
474     }
475     *coloring = dd->ghostedcoloring;
476   } else SETERRQ1(((PetscObject)da)->comm,PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
477   PetscFunctionReturn(0);
478 }
479 
480 /* =========================================================================== */
481 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM,Mat);
482 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM,Mat);
483 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM,Mat);
484 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM,Mat);
485 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM,Mat);
486 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM,Mat);
487 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM,Mat);
488 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM,Mat);
489 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM,Mat);
490 
491 #undef __FUNCT__
492 #define __FUNCT__ "MatSetDM"
493 /*@
494    MatSetDM - Sets the DMDA that is to be used by the HYPRE_StructMatrix PETSc matrix
495 
496    Logically Collective on Mat
497 
498    Input Parameters:
499 +  mat - the matrix
500 -  da - the da
501 
502    Level: intermediate
503 
504 @*/
505 PetscErrorCode  MatSetDM(Mat mat,DM da)
506 {
507   PetscErrorCode ierr;
508 
509   PetscFunctionBegin;
510   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
511   PetscValidHeaderSpecific(da,DM_CLASSID,1);
512   ierr = PetscTryMethod(mat,"MatSetDM_C",(Mat,DM),(mat,da));CHKERRQ(ierr);
513   PetscFunctionReturn(0);
514 }
515 
516 EXTERN_C_BEGIN
517 #undef __FUNCT__
518 #define __FUNCT__ "MatView_MPI_DA"
519 PetscErrorCode  MatView_MPI_DA(Mat A,PetscViewer viewer)
520 {
521   DM             da;
522   PetscErrorCode ierr;
523   const char     *prefix;
524   Mat            Anatural;
525   AO             ao;
526   PetscInt       rstart,rend,*petsc,i;
527   IS             is;
528   MPI_Comm       comm;
529 
530   PetscFunctionBegin;
531   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
532   ierr = PetscObjectQuery((PetscObject)A,"DM",(PetscObject*)&da);CHKERRQ(ierr);
533   if (!da) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
534 
535   ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);
536   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
537   ierr = PetscMalloc((rend-rstart)*sizeof(PetscInt),&petsc);CHKERRQ(ierr);
538   for (i=rstart; i<rend; i++) petsc[i-rstart] = i;
539   ierr = AOApplicationToPetsc(ao,rend-rstart,petsc);CHKERRQ(ierr);
540   ierr = ISCreateGeneral(comm,rend-rstart,petsc,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
541 
542   /* call viewer on natural ordering */
543   ierr = MatGetSubMatrix(A,is,is,MAT_INITIAL_MATRIX,&Anatural);CHKERRQ(ierr);
544   ierr = ISDestroy(&is);CHKERRQ(ierr);
545   ierr = PetscObjectGetOptionsPrefix((PetscObject)A,&prefix);CHKERRQ(ierr);
546   ierr = PetscObjectSetOptionsPrefix((PetscObject)Anatural,prefix);CHKERRQ(ierr);
547   ierr = PetscObjectSetName((PetscObject)Anatural,((PetscObject)A)->name);CHKERRQ(ierr);
548   ierr = MatView(Anatural,viewer);CHKERRQ(ierr);
549   ierr = MatDestroy(&Anatural);CHKERRQ(ierr);
550   PetscFunctionReturn(0);
551 }
552 EXTERN_C_END
553 
554 EXTERN_C_BEGIN
555 #undef __FUNCT__
556 #define __FUNCT__ "MatLoad_MPI_DA"
557 PetscErrorCode  MatLoad_MPI_DA(Mat A,PetscViewer viewer)
558 {
559   DM             da;
560   PetscErrorCode ierr;
561   Mat            Anatural,Aapp;
562   AO             ao;
563   PetscInt       rstart,rend,*app,i;
564   IS             is;
565   MPI_Comm       comm;
566 
567   PetscFunctionBegin;
568   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
569   ierr = PetscObjectQuery((PetscObject)A,"DM",(PetscObject*)&da);CHKERRQ(ierr);
570   if (!da) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
571 
572   /* Load the matrix in natural ordering */
573   ierr = MatCreate(((PetscObject)A)->comm,&Anatural);CHKERRQ(ierr);
574   ierr = MatSetType(Anatural,((PetscObject)A)->type_name);CHKERRQ(ierr);
575   ierr = MatSetSizes(Anatural,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
576   ierr = MatLoad(Anatural,viewer);CHKERRQ(ierr);
577 
578   /* Map natural ordering to application ordering and create IS */
579   ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);
580   ierr = MatGetOwnershipRange(Anatural,&rstart,&rend);CHKERRQ(ierr);
581   ierr = PetscMalloc((rend-rstart)*sizeof(PetscInt),&app);CHKERRQ(ierr);
582   for (i=rstart; i<rend; i++) app[i-rstart] = i;
583   ierr = AOPetscToApplication(ao,rend-rstart,app);CHKERRQ(ierr);
584   ierr = ISCreateGeneral(comm,rend-rstart,app,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
585 
586   /* Do permutation and replace header */
587   ierr = MatGetSubMatrix(Anatural,is,is,MAT_INITIAL_MATRIX,&Aapp);CHKERRQ(ierr);
588   ierr = MatHeaderReplace(A,Aapp);CHKERRQ(ierr);
589   ierr = ISDestroy(&is);CHKERRQ(ierr);
590   ierr = MatDestroy(&Anatural);CHKERRQ(ierr);
591   PetscFunctionReturn(0);
592 }
593 EXTERN_C_END
594 
595 #undef __FUNCT__
596 #define __FUNCT__ "DMCreateMatrix_DA"
597 PetscErrorCode DMCreateMatrix_DA(DM da, const MatType mtype,Mat *J)
598 {
599   PetscErrorCode ierr;
600   PetscInt       dim,dof,nx,ny,nz,dims[3],starts[3],M,N,P;
601   Mat            A;
602   MPI_Comm       comm;
603   const MatType  Atype;
604   void           (*aij)(void)=PETSC_NULL,(*baij)(void)=PETSC_NULL,(*sbaij)(void)=PETSC_NULL;
605   MatType        ttype[256];
606   PetscBool      flg;
607   PetscMPIInt    size;
608   DM_DA          *dd = (DM_DA*)da->data;
609 
610   PetscFunctionBegin;
611 #ifndef PETSC_USE_DYNAMIC_LIBRARIES
612   ierr = MatInitializePackage(PETSC_NULL);CHKERRQ(ierr);
613 #endif
614   if (!mtype) mtype = MATAIJ;
615   ierr = PetscStrcpy((char*)ttype,mtype);CHKERRQ(ierr);
616   ierr = PetscOptionsBegin(((PetscObject)da)->comm,((PetscObject)da)->prefix,"DMDA options","Mat");CHKERRQ(ierr);
617   ierr = PetscOptionsList("-dm_mat_type","Matrix type","MatSetType",MatList,mtype,(char*)ttype,256,&flg);CHKERRQ(ierr);
618   ierr = PetscOptionsEnd();
619 
620   /*
621                                   m
622           ------------------------------------------------------
623          |                                                     |
624          |                                                     |
625          |               ----------------------                |
626          |               |                    |                |
627       n  |           ny  |                    |                |
628          |               |                    |                |
629          |               .---------------------                |
630          |             (xs,ys)     nx                          |
631          |            .                                        |
632          |         (gxs,gys)                                   |
633          |                                                     |
634           -----------------------------------------------------
635   */
636 
637   /*
638          nc - number of components per grid point
639          col - number of colors needed in one direction for single component problem
640 
641   */
642   ierr = DMDAGetInfo(da,&dim,&M,&N,&P,0,0,0,&dof,0,0,0,0,0);CHKERRQ(ierr);
643   ierr = DMDAGetCorners(da,0,0,0,&nx,&ny,&nz);CHKERRQ(ierr);
644   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
645   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
646   ierr = MatSetSizes(A,dof*nx*ny*nz,dof*nx*ny*nz,dof*M*N*P,dof*M*N*P);CHKERRQ(ierr);
647   ierr = MatSetType(A,(const MatType)ttype);CHKERRQ(ierr);
648   ierr = MatSetDM(A,da);CHKERRQ(ierr);
649   ierr = MatSetFromOptions(A);CHKERRQ(ierr);
650   ierr = MatGetType(A,&Atype);CHKERRQ(ierr);
651   /*
652      We do not provide a getmatrix function in the DMDA operations because
653    the basic DMDA does not know about matrices. We think of DMDA as being more
654    more low-level than matrices. This is kind of cheating but, cause sometimes
655    we think of DMDA has higher level than matrices.
656 
657      We could switch based on Atype (or mtype), but we do not since the
658    specialized setting routines depend only the particular preallocation
659    details of the matrix, not the type itself.
660   */
661   ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJSetPreallocation_C",&aij);CHKERRQ(ierr);
662   if (!aij) {
663     ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJSetPreallocation_C",&aij);CHKERRQ(ierr);
664   }
665   if (!aij) {
666     ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIBAIJSetPreallocation_C",&baij);CHKERRQ(ierr);
667     if (!baij) {
668       ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",&baij);CHKERRQ(ierr);
669     }
670     if (!baij){
671       ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPISBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr);
672       if (!sbaij) {
673         ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr);
674       }
675     }
676   }
677   if (aij) {
678     if (dim == 1) {
679       ierr = DMCreateMatrix_DA_1d_MPIAIJ(da,A);CHKERRQ(ierr);
680     } else if (dim == 2) {
681       if (dd->ofill) {
682         ierr = DMCreateMatrix_DA_2d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
683       } else {
684         ierr = DMCreateMatrix_DA_2d_MPIAIJ(da,A);CHKERRQ(ierr);
685       }
686     } else if (dim == 3) {
687       if (dd->ofill) {
688         ierr = DMCreateMatrix_DA_3d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
689       } else {
690         ierr = DMCreateMatrix_DA_3d_MPIAIJ(da,A);CHKERRQ(ierr);
691       }
692     }
693   } else if (baij) {
694     if (dim == 2) {
695       ierr = DMCreateMatrix_DA_2d_MPIBAIJ(da,A);CHKERRQ(ierr);
696     } else if (dim == 3) {
697       ierr = DMCreateMatrix_DA_3d_MPIBAIJ(da,A);CHKERRQ(ierr);
698     } else {
699       SETERRQ3(((PetscObject)da)->comm,PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension!\n" \
700 	       "Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
701     }
702   } else if (sbaij) {
703     if (dim == 2) {
704       ierr = DMCreateMatrix_DA_2d_MPISBAIJ(da,A);CHKERRQ(ierr);
705     } else if (dim == 3) {
706       ierr = DMCreateMatrix_DA_3d_MPISBAIJ(da,A);CHKERRQ(ierr);
707     } else {
708       SETERRQ3(((PetscObject)da)->comm,PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension!\n" \
709 	       "Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
710     }
711   } else {
712     ISLocalToGlobalMapping ltog,ltogb;
713     ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
714     ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
715     ierr = MatSetLocalToGlobalMapping(A,ltog,ltog);CHKERRQ(ierr);
716     ierr = MatSetLocalToGlobalMappingBlock(A,ltogb,ltogb);CHKERRQ(ierr);
717   }
718   ierr = DMDAGetGhostCorners(da,&starts[0],&starts[1],&starts[2],&dims[0],&dims[1],&dims[2]);CHKERRQ(ierr);
719   ierr = MatSetStencil(A,dim,dims,starts,dof);CHKERRQ(ierr);
720   ierr = PetscObjectCompose((PetscObject)A,"DM",(PetscObject)da);CHKERRQ(ierr);
721   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
722   if (size > 1) {
723     /* change viewer to display matrix in natural ordering */
724     ierr = MatShellSetOperation(A, MATOP_VIEW, (void (*)(void)) MatView_MPI_DA);CHKERRQ(ierr);
725     ierr = MatShellSetOperation(A, MATOP_LOAD, (void (*)(void)) MatLoad_MPI_DA);CHKERRQ(ierr);
726   }
727   *J = A;
728   PetscFunctionReturn(0);
729 }
730 
731 /* ---------------------------------------------------------------------------------*/
732 #undef __FUNCT__
733 #define __FUNCT__ "DMCreateMatrix_DA_2d_MPIAIJ"
734 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da,Mat J)
735 {
736   PetscErrorCode         ierr;
737   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = PETSC_NULL,k,nc,*rows = PETSC_NULL,col,cnt,l,p;
738   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
739   MPI_Comm               comm;
740   PetscScalar            *values;
741   DMDABoundaryType       bx,by;
742   ISLocalToGlobalMapping ltog,ltogb;
743   DMDAStencilType        st;
744 
745   PetscFunctionBegin;
746   /*
747          nc - number of components per grid point
748          col - number of colors needed in one direction for single component problem
749 
750   */
751   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
752   col = 2*s + 1;
753   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
754   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
755   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
756 
757   ierr = PetscMalloc2(nc,PetscInt,&rows,col*col*nc*nc,PetscInt,&cols);CHKERRQ(ierr);
758   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
759   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
760 
761   /* determine the matrix preallocation information */
762   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
763   for (i=xs; i<xs+nx; i++) {
764 
765     pstart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
766     pend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
767 
768     for (j=ys; j<ys+ny; j++) {
769       slot = i - gxs + gnx*(j - gys);
770 
771       lstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
772       lend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
773 
774       cnt  = 0;
775       for (k=0; k<nc; k++) {
776 	for (l=lstart; l<lend+1; l++) {
777 	  for (p=pstart; p<pend+1; p++) {
778 	    if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
779 	      cols[cnt++]  = k + nc*(slot + gnx*l + p);
780 	    }
781 	  }
782 	}
783 	rows[k] = k + nc*(slot);
784       }
785       ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
786     }
787   }
788   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
789   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
790   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
791   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
792 
793   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
794   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
795 
796   /*
797     For each node in the grid: we get the neighbors in the local (on processor ordering
798     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
799     PETSc ordering.
800   */
801   if (!da->prealloc_only) {
802     ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
803     ierr = PetscMemzero(values,col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
804     for (i=xs; i<xs+nx; i++) {
805 
806       pstart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
807       pend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
808 
809       for (j=ys; j<ys+ny; j++) {
810 	slot = i - gxs + gnx*(j - gys);
811 
812 	lstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
813 	lend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
814 
815 	cnt  = 0;
816 	for (k=0; k<nc; k++) {
817 	  for (l=lstart; l<lend+1; l++) {
818 	    for (p=pstart; p<pend+1; p++) {
819 	      if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
820 		cols[cnt++]  = k + nc*(slot + gnx*l + p);
821 	      }
822 	    }
823 	  }
824 	  rows[k]      = k + nc*(slot);
825 	}
826 	ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
827       }
828     }
829     ierr = PetscFree(values);CHKERRQ(ierr);
830     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
831     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
832   }
833   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
834   PetscFunctionReturn(0);
835 }
836 
837 #undef __FUNCT__
838 #define __FUNCT__ "DMCreateMatrix_DA_2d_MPIAIJ_Fill"
839 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da,Mat J)
840 {
841   PetscErrorCode         ierr;
842   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
843   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt,l,p;
844   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
845   DM_DA                  *dd = (DM_DA*)da->data;
846   PetscInt               ifill_col,*ofill = dd->ofill, *dfill = dd->dfill;
847   MPI_Comm               comm;
848   PetscScalar            *values;
849   DMDABoundaryType       bx,by;
850   ISLocalToGlobalMapping ltog,ltogb;
851   DMDAStencilType        st;
852 
853   PetscFunctionBegin;
854   /*
855          nc - number of components per grid point
856          col - number of colors needed in one direction for single component problem
857 
858   */
859   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
860   col = 2*s + 1;
861   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
862   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
863   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
864 
865   ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscInt),&cols);CHKERRQ(ierr);
866   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
867   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
868 
869   /* determine the matrix preallocation information */
870   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
871   for (i=xs; i<xs+nx; i++) {
872 
873     pstart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
874     pend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
875 
876     for (j=ys; j<ys+ny; j++) {
877       slot = i - gxs + gnx*(j - gys);
878 
879       lstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
880       lend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
881 
882       for (k=0; k<nc; k++) {
883         cnt  = 0;
884 	for (l=lstart; l<lend+1; l++) {
885 	  for (p=pstart; p<pend+1; p++) {
886             if (l || p) {
887 	      if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
888                 for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++)
889 		  cols[cnt++]  = ofill[ifill_col] + nc*(slot + gnx*l + p);
890 	      }
891             } else {
892 	      if (dfill) {
893 		for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++)
894 		  cols[cnt++]  = dfill[ifill_col] + nc*(slot + gnx*l + p);
895 	      } else {
896 		for (ifill_col=0; ifill_col<nc; ifill_col++)
897 		  cols[cnt++]  = ifill_col + nc*(slot + gnx*l + p);
898 	      }
899             }
900 	  }
901 	}
902 	row = k + nc*(slot);
903         ierr = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
904       }
905     }
906   }
907   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
908   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
909   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
910   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
911   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
912 
913   /*
914     For each node in the grid: we get the neighbors in the local (on processor ordering
915     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
916     PETSc ordering.
917   */
918   if (!da->prealloc_only) {
919     ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
920     ierr = PetscMemzero(values,col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
921     for (i=xs; i<xs+nx; i++) {
922 
923       pstart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
924       pend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
925 
926       for (j=ys; j<ys+ny; j++) {
927 	slot = i - gxs + gnx*(j - gys);
928 
929 	lstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
930 	lend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
931 
932 	for (k=0; k<nc; k++) {
933 	  cnt  = 0;
934 	  for (l=lstart; l<lend+1; l++) {
935 	    for (p=pstart; p<pend+1; p++) {
936 	      if (l || p) {
937 		if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
938 		  for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++)
939 		    cols[cnt++]  = ofill[ifill_col] + nc*(slot + gnx*l + p);
940 		}
941 	      } else {
942 		if (dfill) {
943 		  for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++)
944 		    cols[cnt++]  = dfill[ifill_col] + nc*(slot + gnx*l + p);
945 		} else {
946 		  for (ifill_col=0; ifill_col<nc; ifill_col++)
947 		    cols[cnt++]  = ifill_col + nc*(slot + gnx*l + p);
948 		}
949 	      }
950 	    }
951 	  }
952 	  row  = k + nc*(slot);
953 	  ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
954 	}
955       }
956     }
957     ierr = PetscFree(values);CHKERRQ(ierr);
958     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
959     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
960   }
961   ierr = PetscFree(cols);CHKERRQ(ierr);
962   PetscFunctionReturn(0);
963 }
964 
965 /* ---------------------------------------------------------------------------------*/
966 
967 #undef __FUNCT__
968 #define __FUNCT__ "DMCreateMatrix_DA_3d_MPIAIJ"
969 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da,Mat J)
970 {
971   PetscErrorCode         ierr;
972   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
973   PetscInt               m,n,dim,s,*cols = PETSC_NULL,k,nc,*rows = PETSC_NULL,col,cnt,l,p,*dnz = PETSC_NULL,*onz = PETSC_NULL;
974   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
975   MPI_Comm               comm;
976   PetscScalar            *values;
977   DMDABoundaryType       bx,by,bz;
978   ISLocalToGlobalMapping ltog,ltogb;
979   DMDAStencilType        st;
980 
981   PetscFunctionBegin;
982   /*
983          nc - number of components per grid point
984          col - number of colors needed in one direction for single component problem
985 
986   */
987   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
988   col    = 2*s + 1;
989 
990   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
991   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
992   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
993 
994   ierr = PetscMalloc2(nc,PetscInt,&rows,col*col*col*nc*nc,PetscInt,&cols);CHKERRQ(ierr);
995   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
996   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
997 
998   /* determine the matrix preallocation information */
999   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1000   for (i=xs; i<xs+nx; i++) {
1001     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1002     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1003     for (j=ys; j<ys+ny; j++) {
1004       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1005       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1006       for (k=zs; k<zs+nz; k++) {
1007 	kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1008 	kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1009 
1010 	slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1011 
1012 	cnt  = 0;
1013 	for (l=0; l<nc; l++) {
1014 	  for (ii=istart; ii<iend+1; ii++) {
1015 	    for (jj=jstart; jj<jend+1; jj++) {
1016 	      for (kk=kstart; kk<kend+1; kk++) {
1017 		if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1018 		  cols[cnt++]  = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1019 		}
1020 	      }
1021 	    }
1022 	  }
1023 	  rows[l] = l + nc*(slot);
1024 	}
1025 	ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1026       }
1027     }
1028   }
1029   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1030   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1031   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1032   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1033   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1034   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1035 
1036   /*
1037     For each node in the grid: we get the neighbors in the local (on processor ordering
1038     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1039     PETSc ordering.
1040   */
1041   if (!da->prealloc_only) {
1042     ierr = PetscMalloc(col*col*col*nc*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1043     ierr = PetscMemzero(values,col*col*col*nc*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1044     for (i=xs; i<xs+nx; i++) {
1045       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1046       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1047       for (j=ys; j<ys+ny; j++) {
1048 	jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1049 	jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1050 	for (k=zs; k<zs+nz; k++) {
1051 	  kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1052 	  kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1053 
1054 	  slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1055 
1056 	  cnt  = 0;
1057 	  for (l=0; l<nc; l++) {
1058 	    for (ii=istart; ii<iend+1; ii++) {
1059 	      for (jj=jstart; jj<jend+1; jj++) {
1060 		for (kk=kstart; kk<kend+1; kk++) {
1061 		  if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1062 		    cols[cnt++]  = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1063 		  }
1064 		}
1065 	      }
1066 	    }
1067 	    rows[l]      = l + nc*(slot);
1068 	  }
1069 	  ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1070 	}
1071       }
1072     }
1073     ierr = PetscFree(values);CHKERRQ(ierr);
1074     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1075     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1076   }
1077   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1078   PetscFunctionReturn(0);
1079 }
1080 
1081 /* ---------------------------------------------------------------------------------*/
1082 
1083 #undef __FUNCT__
1084 #define __FUNCT__ "DMCreateMatrix_DA_1d_MPIAIJ"
1085 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da,Mat J)
1086 {
1087   PetscErrorCode         ierr;
1088   PetscInt               xs,nx,i,i1,slot,gxs,gnx;
1089   PetscInt               m,dim,s,*cols = PETSC_NULL,nc,*rows = PETSC_NULL,col,cnt,l;
1090   PetscInt               istart,iend;
1091   PetscScalar            *values;
1092   DMDABoundaryType       bx;
1093   ISLocalToGlobalMapping ltog,ltogb;
1094 
1095   PetscFunctionBegin;
1096   /*
1097          nc - number of components per grid point
1098          col - number of colors needed in one direction for single component problem
1099 
1100   */
1101   ierr = DMDAGetInfo(da,&dim,&m,0,0,0,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr);
1102   col    = 2*s + 1;
1103 
1104   ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
1105   ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
1106 
1107   ierr = MatSeqAIJSetPreallocation(J,col*nc,0);CHKERRQ(ierr);
1108   ierr = MatMPIAIJSetPreallocation(J,col*nc,0,col*nc,0);CHKERRQ(ierr);
1109   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1110   ierr = PetscMalloc2(nc,PetscInt,&rows,col*nc*nc,PetscInt,&cols);CHKERRQ(ierr);
1111 
1112   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1113   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1114   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1115   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1116 
1117   /*
1118     For each node in the grid: we get the neighbors in the local (on processor ordering
1119     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1120     PETSc ordering.
1121   */
1122   if (!da->prealloc_only) {
1123     ierr = PetscMalloc(col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1124     ierr = PetscMemzero(values,col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1125     for (i=xs; i<xs+nx; i++) {
1126       istart = PetscMax(-s,gxs - i);
1127       iend   = PetscMin(s,gxs + gnx - i - 1);
1128       slot   = i - gxs;
1129 
1130       cnt  = 0;
1131       for (l=0; l<nc; l++) {
1132 	for (i1=istart; i1<iend+1; i1++) {
1133 	  cols[cnt++] = l + nc*(slot + i1);
1134 	}
1135 	rows[l]      = l + nc*(slot);
1136       }
1137       ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1138     }
1139     ierr = PetscFree(values);CHKERRQ(ierr);
1140     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1141     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1142   }
1143   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1144   PetscFunctionReturn(0);
1145 }
1146 
1147 #undef __FUNCT__
1148 #define __FUNCT__ "DMCreateMatrix_DA_2d_MPIBAIJ"
1149 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da,Mat J)
1150 {
1151   PetscErrorCode         ierr;
1152   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1153   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
1154   PetscInt               istart,iend,jstart,jend,ii,jj;
1155   MPI_Comm               comm;
1156   PetscScalar            *values;
1157   DMDABoundaryType       bx,by;
1158   DMDAStencilType        st;
1159   ISLocalToGlobalMapping ltog,ltogb;
1160 
1161   PetscFunctionBegin;
1162   /*
1163      nc - number of components per grid point
1164      col - number of colors needed in one direction for single component problem
1165   */
1166   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
1167   col = 2*s + 1;
1168 
1169   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
1170   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
1171   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1172 
1173   ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1174 
1175   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1176   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1177 
1178   /* determine the matrix preallocation information */
1179   ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
1180   for (i=xs; i<xs+nx; i++) {
1181     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1182     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1183     for (j=ys; j<ys+ny; j++) {
1184       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1185       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1186       slot = i - gxs + gnx*(j - gys);
1187 
1188       /* Find block columns in block row */
1189       cnt  = 0;
1190       for (ii=istart; ii<iend+1; ii++) {
1191         for (jj=jstart; jj<jend+1; jj++) {
1192           if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1193             cols[cnt++]  = slot + ii + gnx*jj;
1194           }
1195         }
1196       }
1197       ierr = MatPreallocateSetLocal(ltogb,1,&slot,ltogb,cnt,cols,dnz,onz);CHKERRQ(ierr);
1198     }
1199   }
1200   ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1201   ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1202   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1203 
1204   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1205   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1206 
1207   /*
1208     For each node in the grid: we get the neighbors in the local (on processor ordering
1209     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1210     PETSc ordering.
1211   */
1212   if (!da->prealloc_only) {
1213     ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1214     ierr = PetscMemzero(values,col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1215     for (i=xs; i<xs+nx; i++) {
1216       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1217       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1218       for (j=ys; j<ys+ny; j++) {
1219 	jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1220 	jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1221 	slot = i - gxs + gnx*(j - gys);
1222 	cnt  = 0;
1223         for (ii=istart; ii<iend+1; ii++) {
1224           for (jj=jstart; jj<jend+1; jj++) {
1225             if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1226               cols[cnt++]  = slot + ii + gnx*jj;
1227             }
1228           }
1229         }
1230 	ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1231       }
1232     }
1233     ierr = PetscFree(values);CHKERRQ(ierr);
1234     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1235     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1236   }
1237   ierr = PetscFree(cols);CHKERRQ(ierr);
1238   PetscFunctionReturn(0);
1239 }
1240 
1241 #undef __FUNCT__
1242 #define __FUNCT__ "DMCreateMatrix_DA_3d_MPIBAIJ"
1243 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da,Mat J)
1244 {
1245   PetscErrorCode         ierr;
1246   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1247   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
1248   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1249   MPI_Comm               comm;
1250   PetscScalar            *values;
1251   DMDABoundaryType       bx,by,bz;
1252   DMDAStencilType        st;
1253   ISLocalToGlobalMapping ltog,ltogb;
1254 
1255   PetscFunctionBegin;
1256   /*
1257          nc - number of components per grid point
1258          col - number of colors needed in one direction for single component problem
1259 
1260   */
1261   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1262   col    = 2*s + 1;
1263 
1264   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1265   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1266   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1267 
1268   ierr  = PetscMalloc(col*col*col*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1269 
1270   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1271   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1272 
1273   /* determine the matrix preallocation information */
1274   ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1275   for (i=xs; i<xs+nx; i++) {
1276     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1277     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1278     for (j=ys; j<ys+ny; j++) {
1279       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1280       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1281       for (k=zs; k<zs+nz; k++) {
1282 	kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1283 	kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1284 
1285 	slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1286 
1287 	/* Find block columns in block row */
1288 	cnt  = 0;
1289         for (ii=istart; ii<iend+1; ii++) {
1290           for (jj=jstart; jj<jend+1; jj++) {
1291             for (kk=kstart; kk<kend+1; kk++) {
1292               if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1293 		cols[cnt++]  = slot + ii + gnx*jj + gnx*gny*kk;
1294 	      }
1295 	    }
1296 	  }
1297 	}
1298 	ierr = MatPreallocateSetLocal(ltogb,1,&slot,ltogb,cnt,cols,dnz,onz);CHKERRQ(ierr);
1299       }
1300     }
1301   }
1302   ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1303   ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1304   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1305 
1306   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1307   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1308 
1309   /*
1310     For each node in the grid: we get the neighbors in the local (on processor ordering
1311     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1312     PETSc ordering.
1313   */
1314   if (!da->prealloc_only) {
1315     ierr  = PetscMalloc(col*col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1316     ierr  = PetscMemzero(values,col*col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1317     for (i=xs; i<xs+nx; i++) {
1318       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1319       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1320       for (j=ys; j<ys+ny; j++) {
1321 	jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1322 	jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1323 	for (k=zs; k<zs+nz; k++) {
1324 	  kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1325 	  kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1326 
1327 	  slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1328 
1329 	  cnt  = 0;
1330           for (ii=istart; ii<iend+1; ii++) {
1331             for (jj=jstart; jj<jend+1; jj++) {
1332               for (kk=kstart; kk<kend+1; kk++) {
1333                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1334                   cols[cnt++]  = slot + ii + gnx*jj + gnx*gny*kk;
1335                 }
1336               }
1337             }
1338           }
1339 	  ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1340 	}
1341       }
1342     }
1343     ierr = PetscFree(values);CHKERRQ(ierr);
1344     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1345     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1346   }
1347   ierr = PetscFree(cols);CHKERRQ(ierr);
1348   PetscFunctionReturn(0);
1349 }
1350 
1351 #undef __FUNCT__
1352 #define __FUNCT__ "L2GFilterUpperTriangular"
1353 /*
1354   This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to
1355   identify in the local ordering with periodic domain.
1356 */
1357 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog,PetscInt *row,PetscInt *cnt,PetscInt col[])
1358 {
1359   PetscErrorCode ierr;
1360   PetscInt       i,n;
1361 
1362   PetscFunctionBegin;
1363   ierr = ISLocalToGlobalMappingApply(ltog,1,row,row);CHKERRQ(ierr);
1364   ierr = ISLocalToGlobalMappingApply(ltog,*cnt,col,col);CHKERRQ(ierr);
1365   for (i=0,n=0; i<*cnt; i++) {
1366     if (col[i] >= *row) col[n++] = col[i];
1367   }
1368   *cnt = n;
1369   PetscFunctionReturn(0);
1370 }
1371 
1372 #undef __FUNCT__
1373 #define __FUNCT__ "DMCreateMatrix_DA_2d_MPISBAIJ"
1374 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da,Mat J)
1375 {
1376   PetscErrorCode         ierr;
1377   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1378   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
1379   PetscInt               istart,iend,jstart,jend,ii,jj;
1380   MPI_Comm               comm;
1381   PetscScalar            *values;
1382   DMDABoundaryType       bx,by;
1383   DMDAStencilType        st;
1384   ISLocalToGlobalMapping ltog,ltogb;
1385 
1386   PetscFunctionBegin;
1387   /*
1388      nc - number of components per grid point
1389      col - number of colors needed in one direction for single component problem
1390   */
1391   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
1392   col = 2*s + 1;
1393 
1394   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
1395   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
1396   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1397 
1398   ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1399 
1400   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1401   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1402 
1403   /* determine the matrix preallocation information */
1404   ierr = MatPreallocateSymmetricInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
1405   for (i=xs; i<xs+nx; i++) {
1406     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1407     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1408     for (j=ys; j<ys+ny; j++) {
1409       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1410       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1411       slot = i - gxs + gnx*(j - gys);
1412 
1413       /* Find block columns in block row */
1414       cnt  = 0;
1415       for (ii=istart; ii<iend+1; ii++) {
1416         for (jj=jstart; jj<jend+1; jj++) {
1417           if (st == DMDA_STENCIL_BOX || !ii || !jj) {
1418             cols[cnt++]  = slot + ii + gnx*jj;
1419           }
1420         }
1421       }
1422       ierr = L2GFilterUpperTriangular(ltogb,&slot,&cnt,cols);CHKERRQ(ierr);
1423       ierr = MatPreallocateSymmetricSet(slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
1424     }
1425   }
1426   ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1427   ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1428   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1429 
1430   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1431   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1432 
1433   /*
1434     For each node in the grid: we get the neighbors in the local (on processor ordering
1435     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1436     PETSc ordering.
1437   */
1438   if (!da->prealloc_only) {
1439     ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1440     ierr = PetscMemzero(values,col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1441     for (i=xs; i<xs+nx; i++) {
1442       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1443       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1444       for (j=ys; j<ys+ny; j++) {
1445         jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1446         jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1447         slot = i - gxs + gnx*(j - gys);
1448 
1449         /* Find block columns in block row */
1450         cnt  = 0;
1451         for (ii=istart; ii<iend+1; ii++) {
1452           for (jj=jstart; jj<jend+1; jj++) {
1453             if (st == DMDA_STENCIL_BOX || !ii || !jj) {
1454               cols[cnt++]  = slot + ii + gnx*jj;
1455             }
1456           }
1457         }
1458         ierr = L2GFilterUpperTriangular(ltogb,&slot,&cnt,cols);CHKERRQ(ierr);
1459 	ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1460       }
1461     }
1462     ierr = PetscFree(values);CHKERRQ(ierr);
1463     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1464     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1465   }
1466   ierr = PetscFree(cols);CHKERRQ(ierr);
1467   PetscFunctionReturn(0);
1468 }
1469 
1470 #undef __FUNCT__
1471 #define __FUNCT__ "DMCreateMatrix_DA_3d_MPISBAIJ"
1472 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da,Mat J)
1473 {
1474   PetscErrorCode         ierr;
1475   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1476   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
1477   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1478   MPI_Comm               comm;
1479   PetscScalar            *values;
1480   DMDABoundaryType       bx,by,bz;
1481   DMDAStencilType        st;
1482   ISLocalToGlobalMapping ltog,ltogb;
1483 
1484   PetscFunctionBegin;
1485   /*
1486      nc - number of components per grid point
1487      col - number of colors needed in one direction for single component problem
1488   */
1489   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1490   col = 2*s + 1;
1491 
1492   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1493   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1494   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1495 
1496   /* create the matrix */
1497   ierr = PetscMalloc(col*col*col*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1498 
1499   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1500   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1501 
1502   /* determine the matrix preallocation information */
1503   ierr = MatPreallocateSymmetricInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1504   for (i=xs; i<xs+nx; i++) {
1505     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1506     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1507     for (j=ys; j<ys+ny; j++) {
1508       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1509       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1510       for (k=zs; k<zs+nz; k++) {
1511         kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1512 	kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1513 
1514 	slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1515 
1516 	/* Find block columns in block row */
1517 	cnt  = 0;
1518         for (ii=istart; ii<iend+1; ii++) {
1519           for (jj=jstart; jj<jend+1; jj++) {
1520             for (kk=kstart; kk<kend+1; kk++) {
1521               if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
1522                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1523               }
1524             }
1525           }
1526         }
1527         ierr = L2GFilterUpperTriangular(ltogb,&slot,&cnt,cols);CHKERRQ(ierr);
1528         ierr = MatPreallocateSymmetricSet(slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
1529       }
1530     }
1531   }
1532   ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1533   ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1534   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1535 
1536   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1537   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1538 
1539   /*
1540     For each node in the grid: we get the neighbors in the local (on processor ordering
1541     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1542     PETSc ordering.
1543   */
1544   if (!da->prealloc_only) {
1545     ierr = PetscMalloc(col*col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1546     ierr = PetscMemzero(values,col*col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1547     for (i=xs; i<xs+nx; i++) {
1548       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1549       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1550       for (j=ys; j<ys+ny; j++) {
1551         jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1552 	jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1553 	for (k=zs; k<zs+nz; k++) {
1554           kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1555 	  kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1556 
1557 	  slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1558 
1559 	  cnt  = 0;
1560           for (ii=istart; ii<iend+1; ii++) {
1561             for (jj=jstart; jj<jend+1; jj++) {
1562               for (kk=kstart; kk<kend+1; kk++) {
1563                 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
1564 		  cols[cnt++]  = slot + ii + gnx*jj + gnx*gny*kk;
1565 		}
1566 	      }
1567 	    }
1568 	  }
1569           ierr = L2GFilterUpperTriangular(ltogb,&slot,&cnt,cols);CHKERRQ(ierr);
1570           ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1571 	}
1572       }
1573     }
1574     ierr = PetscFree(values);CHKERRQ(ierr);
1575     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1576     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1577   }
1578   ierr = PetscFree(cols);CHKERRQ(ierr);
1579   PetscFunctionReturn(0);
1580 }
1581 
1582 /* ---------------------------------------------------------------------------------*/
1583 
1584 #undef __FUNCT__
1585 #define __FUNCT__ "DMCreateMatrix_DA_3d_MPIAIJ_Fill"
1586 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da,Mat J)
1587 {
1588   PetscErrorCode         ierr;
1589   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1590   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt,l,p,*dnz,*onz;
1591   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1592   DM_DA                  *dd = (DM_DA*)da->data;
1593   PetscInt               ifill_col,*dfill = dd->dfill,*ofill = dd->ofill;
1594   MPI_Comm               comm;
1595   PetscScalar            *values;
1596   DMDABoundaryType       bx,by,bz;
1597   ISLocalToGlobalMapping ltog,ltogb;
1598   DMDAStencilType        st;
1599 
1600   PetscFunctionBegin;
1601   /*
1602          nc - number of components per grid point
1603          col - number of colors needed in one direction for single component problem
1604 
1605   */
1606   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1607   col    = 2*s + 1;
1608   if (bx == DMDA_BOUNDARY_PERIODIC && (m % col)){
1609     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
1610                  by 2*stencil_width + 1\n");
1611   }
1612   if (by == DMDA_BOUNDARY_PERIODIC && (n % col)){
1613     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
1614                  by 2*stencil_width + 1\n");
1615   }
1616   if (bz == DMDA_BOUNDARY_PERIODIC && (p % col)){
1617     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
1618                  by 2*stencil_width + 1\n");
1619   }
1620 
1621   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1622   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1623   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1624 
1625   ierr = PetscMalloc(col*col*col*nc*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1626   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1627   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1628 
1629   /* determine the matrix preallocation information */
1630   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1631 
1632 
1633   for (i=xs; i<xs+nx; i++) {
1634     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1635     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1636     for (j=ys; j<ys+ny; j++) {
1637       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1638       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1639       for (k=zs; k<zs+nz; k++) {
1640         kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1641         kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1642 
1643         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1644 
1645 	for (l=0; l<nc; l++) {
1646 	  cnt  = 0;
1647 	  for (ii=istart; ii<iend+1; ii++) {
1648 	    for (jj=jstart; jj<jend+1; jj++) {
1649 	      for (kk=kstart; kk<kend+1; kk++) {
1650 		if (ii || jj || kk) {
1651 		  if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1652 		    for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++)
1653 		      cols[cnt++]  = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1654 		  }
1655 		} else {
1656 		  if (dfill) {
1657 		    for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++)
1658 		      cols[cnt++]  = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1659 		  } else {
1660 		    for (ifill_col=0; ifill_col<nc; ifill_col++)
1661 		      cols[cnt++]  = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1662 		  }
1663 		}
1664 	      }
1665 	    }
1666 	  }
1667 	  row  = l + nc*(slot);
1668 	  ierr = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1669 	}
1670       }
1671     }
1672   }
1673   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1674   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1675   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1676   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1677   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1678 
1679   /*
1680     For each node in the grid: we get the neighbors in the local (on processor ordering
1681     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1682     PETSc ordering.
1683   */
1684   if (!da->prealloc_only) {
1685     ierr = PetscMalloc(col*col*col*nc*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1686     ierr = PetscMemzero(values,col*col*col*nc*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1687     for (i=xs; i<xs+nx; i++) {
1688       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1689       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1690       for (j=ys; j<ys+ny; j++) {
1691 	jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1692 	jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1693 	for (k=zs; k<zs+nz; k++) {
1694 	  kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1695 	  kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1696 
1697 	  slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1698 
1699 	  for (l=0; l<nc; l++) {
1700 	    cnt  = 0;
1701 	    for (ii=istart; ii<iend+1; ii++) {
1702 	      for (jj=jstart; jj<jend+1; jj++) {
1703 		for (kk=kstart; kk<kend+1; kk++) {
1704 		  if (ii || jj || kk) {
1705 		    if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1706 		      for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++)
1707 			cols[cnt++]  = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1708 		    }
1709 		  } else {
1710 		    if (dfill) {
1711 		      for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++)
1712 			cols[cnt++]  = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1713 		    } else {
1714 		      for (ifill_col=0; ifill_col<nc; ifill_col++)
1715 			cols[cnt++]  = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1716 		    }
1717 		  }
1718 		}
1719 	      }
1720 	    }
1721 	    row  = l + nc*(slot);
1722 	    ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1723 	  }
1724 	}
1725       }
1726     }
1727     ierr = PetscFree(values);CHKERRQ(ierr);
1728     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1729     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1730   }
1731   ierr = PetscFree(cols);CHKERRQ(ierr);
1732   PetscFunctionReturn(0);
1733 }
1734