xref: /petsc/src/dm/impls/da/fdda.c (revision 1db2f0e57964f94f63a3f5cf6bb1e5befdb56f30)
1 
2 #include <private/daimpl.h> /*I      "petscdmda.h"     I*/
3 #include <petscmat.h>         /*I      "petscmat.h"    I*/
4 #include <private/matimpl.h>
5 
6 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM,ISColoringType,ISColoring *);
7 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM,ISColoringType,ISColoring *);
8 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM,ISColoringType,ISColoring *);
9 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM,ISColoringType,ISColoring *);
10 
11 /*
12    For ghost i that may be negative or greater than the upper bound this
13   maps it into the 0:m-1 range using periodicity
14 */
15 #define SetInRange(i,m) ((i < 0) ? m+i:((i >= m) ? i-m:i))
16 
17 #undef __FUNCT__
18 #define __FUNCT__ "DMDASetBlockFills_Private"
19 static PetscErrorCode DMDASetBlockFills_Private(PetscInt *dfill,PetscInt w,PetscInt **rfill)
20 {
21   PetscErrorCode ierr;
22   PetscInt       i,j,nz,*fill;
23 
24   PetscFunctionBegin;
25   if (!dfill) PetscFunctionReturn(0);
26 
27   /* count number nonzeros */
28   nz = 0;
29   for (i=0; i<w; i++) {
30     for (j=0; j<w; j++) {
31       if (dfill[w*i+j]) nz++;
32     }
33   }
34   ierr = PetscMalloc((nz + w + 1)*sizeof(PetscInt),&fill);CHKERRQ(ierr);
35   /* construct modified CSR storage of nonzero structure */
36   nz = w + 1;
37   for (i=0; i<w; i++) {
38     fill[i] = nz;
39     for (j=0; j<w; j++) {
40       if (dfill[w*i+j]) {
41 	fill[nz] = j;
42 	nz++;
43       }
44     }
45   }
46   fill[w] = nz;
47 
48   *rfill = fill;
49   PetscFunctionReturn(0);
50 }
51 
52 #undef __FUNCT__
53 #define __FUNCT__ "DMDASetBlockFills"
54 /*@
55     DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem
56     of the matrix returned by DMCreateMatrix().
57 
58     Logically Collective on DMDA
59 
60     Input Parameter:
61 +   da - the distributed array
62 .   dfill - the fill pattern in the diagonal block (may be PETSC_NULL, means use dense block)
63 -   ofill - the fill pattern in the off-diagonal blocks
64 
65 
66     Level: developer
67 
68     Notes: This only makes sense when you are doing multicomponent problems but using the
69        MPIAIJ matrix format
70 
71            The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries
72        representing coupling and 0 entries for missing coupling. For example
73 $             dfill[9] = {1, 0, 0,
74 $                         1, 1, 0,
75 $                         0, 1, 1}
76        means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with
77        itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the
78        diagonal block).
79 
80      DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
81      can be represented in the dfill, ofill format
82 
83    Contributed by Glenn Hammond
84 
85 .seealso DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly()
86 
87 @*/
88 PetscErrorCode  DMDASetBlockFills(DM da,PetscInt *dfill,PetscInt *ofill)
89 {
90   DM_DA          *dd = (DM_DA*)da->data;
91   PetscErrorCode ierr;
92 
93   PetscFunctionBegin;
94   ierr = DMDASetBlockFills_Private(dfill,dd->w,&dd->dfill);CHKERRQ(ierr);
95   ierr = DMDASetBlockFills_Private(ofill,dd->w,&dd->ofill);CHKERRQ(ierr);
96   PetscFunctionReturn(0);
97 }
98 
99 
100 #undef __FUNCT__
101 #define __FUNCT__ "DMCreateColoring_DA"
102 PetscErrorCode  DMCreateColoring_DA(DM da,ISColoringType ctype,const MatType mtype,ISColoring *coloring)
103 {
104   PetscErrorCode   ierr;
105   PetscInt         dim,m,n,p,nc;
106   DMDABoundaryType bx,by,bz;
107   MPI_Comm         comm;
108   PetscMPIInt      size;
109   PetscBool        isBAIJ;
110   DM_DA            *dd = (DM_DA*)da->data;
111 
112   PetscFunctionBegin;
113   /*
114                                   m
115           ------------------------------------------------------
116          |                                                     |
117          |                                                     |
118          |               ----------------------                |
119          |               |                    |                |
120       n  |           yn  |                    |                |
121          |               |                    |                |
122          |               .---------------------                |
123          |             (xs,ys)     xn                          |
124          |            .                                        |
125          |         (gxs,gys)                                   |
126          |                                                     |
127           -----------------------------------------------------
128   */
129 
130   /*
131          nc - number of components per grid point
132          col - number of colors needed in one direction for single component problem
133 
134   */
135   ierr = DMDAGetInfo(da,&dim,0,0,0,&m,&n,&p,&nc,0,&bx,&by,&bz,0);CHKERRQ(ierr);
136 
137   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
138   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
139   if (ctype == IS_COLORING_GHOSTED){
140     if (size == 1) {
141       ctype = IS_COLORING_GLOBAL;
142     } else if (dim > 1){
143       if ((m==1 && bx == DMDA_BOUNDARY_PERIODIC) || (n==1 && by == DMDA_BOUNDARY_PERIODIC) || (p==1 && bz == DMDA_BOUNDARY_PERIODIC)){
144         SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"IS_COLORING_GHOSTED cannot be used for periodic boundary condition having both ends of the domain  on the same process");
145       }
146     }
147   }
148 
149   /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ
150      matrices is for the blocks, not the individual matrix elements  */
151   ierr = PetscStrcmp(mtype,MATBAIJ,&isBAIJ);CHKERRQ(ierr);
152   if (!isBAIJ) {ierr = PetscStrcmp(mtype,MATMPIBAIJ,&isBAIJ);CHKERRQ(ierr);}
153   if (!isBAIJ) {ierr = PetscStrcmp(mtype,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);}
154   if (isBAIJ) {
155     dd->w = 1;
156     dd->xs = dd->xs/nc;
157     dd->xe = dd->xe/nc;
158     dd->Xs = dd->Xs/nc;
159     dd->Xe = dd->Xe/nc;
160   }
161 
162   /*
163      We do not provide a getcoloring function in the DMDA operations because
164    the basic DMDA does not know about matrices. We think of DMDA as being more
165    more low-level then matrices.
166   */
167   if (dim == 1) {
168     ierr = DMCreateColoring_DA_1d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
169   } else if (dim == 2) {
170     ierr =  DMCreateColoring_DA_2d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
171   } else if (dim == 3) {
172     ierr =  DMCreateColoring_DA_3d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
173   } else SETERRQ1(((PetscObject)da)->comm,PETSC_ERR_SUP,"Not done for %D dimension, send us mail petsc-maint@mcs.anl.gov for code",dim);
174   if (isBAIJ) {
175     dd->w = nc;
176     dd->xs = dd->xs*nc;
177     dd->xe = dd->xe*nc;
178     dd->Xs = dd->Xs*nc;
179     dd->Xe = dd->Xe*nc;
180   }
181   PetscFunctionReturn(0);
182 }
183 
184 /* ---------------------------------------------------------------------------------*/
185 
186 #undef __FUNCT__
187 #define __FUNCT__ "DMCreateColoring_DA_2d_MPIAIJ"
188 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
189 {
190   PetscErrorCode         ierr;
191   PetscInt               xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,M,N,dim,s,k,nc,col;
192   PetscInt               ncolors;
193   MPI_Comm               comm;
194   DMDABoundaryType       bx,by;
195   DMDAStencilType        st;
196   ISColoringValue        *colors;
197   DM_DA                  *dd = (DM_DA*)da->data;
198 
199   PetscFunctionBegin;
200   /*
201          nc - number of components per grid point
202          col - number of colors needed in one direction for single component problem
203 
204   */
205   ierr = DMDAGetInfo(da,&dim,&m,&n,0,&M,&N,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
206   col    = 2*s + 1;
207   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
208   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
209   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
210 
211   /* special case as taught to us by Paul Hovland */
212   if (st == DMDA_STENCIL_STAR && s == 1) {
213     ierr = DMCreateColoring_DA_2d_5pt_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
214   } else {
215 
216     if (bx == DMDA_BOUNDARY_PERIODIC && (m % col)){
217       SETERRQ2(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X (%d) is divisible\n\
218                  by 2*stencil_width + 1 (%d)\n", m, col);
219     }
220     if (by == DMDA_BOUNDARY_PERIODIC && (n % col)){
221       SETERRQ2(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y (%d) is divisible\n\
222                  by 2*stencil_width + 1 (%d)\n", n, col);
223     }
224     if (ctype == IS_COLORING_GLOBAL) {
225       if (!dd->localcoloring) {
226 	ierr = PetscMalloc(nc*nx*ny*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
227 	ii = 0;
228 	for (j=ys; j<ys+ny; j++) {
229 	  for (i=xs; i<xs+nx; i++) {
230 	    for (k=0; k<nc; k++) {
231 	      colors[ii++] = k + nc*((i % col) + col*(j % col));
232 	    }
233 	  }
234 	}
235         ncolors = nc + nc*(col-1 + col*(col-1));
236 	ierr = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,&dd->localcoloring);CHKERRQ(ierr);
237       }
238       *coloring = dd->localcoloring;
239     } else if (ctype == IS_COLORING_GHOSTED) {
240       if (!dd->ghostedcoloring) {
241 	ierr = PetscMalloc(nc*gnx*gny*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
242 	ii = 0;
243 	for (j=gys; j<gys+gny; j++) {
244 	  for (i=gxs; i<gxs+gnx; i++) {
245 	    for (k=0; k<nc; k++) {
246 	      /* the complicated stuff is to handle periodic boundaries */
247 	      colors[ii++] = k + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col));
248 	    }
249 	  }
250 	}
251         ncolors = nc + nc*(col - 1 + col*(col-1));
252 	ierr = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,&dd->ghostedcoloring);CHKERRQ(ierr);
253         /* PetscIntView(ncolors,(PetscInt *)colors,0); */
254 
255 	ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_GHOSTED);CHKERRQ(ierr);
256       }
257       *coloring = dd->ghostedcoloring;
258     } else SETERRQ1(((PetscObject)da)->comm,PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
259   }
260   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
261   PetscFunctionReturn(0);
262 }
263 
264 /* ---------------------------------------------------------------------------------*/
265 
266 #undef __FUNCT__
267 #define __FUNCT__ "DMCreateColoring_DA_3d_MPIAIJ"
268 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
269 {
270   PetscErrorCode    ierr;
271   PetscInt          xs,ys,nx,ny,i,j,gxs,gys,gnx,gny,m,n,p,dim,s,k,nc,col,zs,gzs,ii,l,nz,gnz,M,N,P;
272   PetscInt          ncolors;
273   MPI_Comm          comm;
274   DMDABoundaryType  bx,by,bz;
275   DMDAStencilType   st;
276   ISColoringValue   *colors;
277   DM_DA             *dd = (DM_DA*)da->data;
278 
279   PetscFunctionBegin;
280   /*
281          nc - number of components per grid point
282          col - number of colors needed in one direction for single component problem
283 
284   */
285   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
286   col    = 2*s + 1;
287   if (bx == DMDA_BOUNDARY_PERIODIC && (m % col)){
288     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
289                  by 2*stencil_width + 1\n");
290   }
291   if (by == DMDA_BOUNDARY_PERIODIC && (n % col)){
292     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
293                  by 2*stencil_width + 1\n");
294   }
295   if (bz == DMDA_BOUNDARY_PERIODIC && (p % col)){
296     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
297                  by 2*stencil_width + 1\n");
298   }
299 
300   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
301   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
302   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
303 
304   /* create the coloring */
305   if (ctype == IS_COLORING_GLOBAL) {
306     if (!dd->localcoloring) {
307       ierr = PetscMalloc(nc*nx*ny*nz*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
308       ii = 0;
309       for (k=zs; k<zs+nz; k++) {
310         for (j=ys; j<ys+ny; j++) {
311           for (i=xs; i<xs+nx; i++) {
312             for (l=0; l<nc; l++) {
313               colors[ii++] = l + nc*((i % col) + col*(j % col) + col*col*(k % col));
314             }
315           }
316         }
317       }
318       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
319       ierr = ISColoringCreate(comm,ncolors,nc*nx*ny*nz,colors,&dd->localcoloring);CHKERRQ(ierr);
320     }
321     *coloring = dd->localcoloring;
322   } else if (ctype == IS_COLORING_GHOSTED) {
323     if (!dd->ghostedcoloring) {
324       ierr = PetscMalloc(nc*gnx*gny*gnz*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
325       ii = 0;
326       for (k=gzs; k<gzs+gnz; k++) {
327         for (j=gys; j<gys+gny; j++) {
328           for (i=gxs; i<gxs+gnx; i++) {
329             for (l=0; l<nc; l++) {
330               /* the complicated stuff is to handle periodic boundaries */
331               colors[ii++] = l + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col) + col*col*(SetInRange(k,p) % col));
332             }
333           }
334         }
335       }
336       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
337       ierr = ISColoringCreate(comm,ncolors,nc*gnx*gny*gnz,colors,&dd->ghostedcoloring);CHKERRQ(ierr);
338       ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_GHOSTED);CHKERRQ(ierr);
339     }
340     *coloring = dd->ghostedcoloring;
341   } else SETERRQ1(((PetscObject)da)->comm,PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
342   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
343   PetscFunctionReturn(0);
344 }
345 
346 /* ---------------------------------------------------------------------------------*/
347 
348 #undef __FUNCT__
349 #define __FUNCT__ "DMCreateColoring_DA_1d_MPIAIJ"
350 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
351 {
352   PetscErrorCode    ierr;
353   PetscInt          xs,nx,i,i1,gxs,gnx,l,m,M,dim,s,nc,col;
354   PetscInt          ncolors;
355   MPI_Comm          comm;
356   DMDABoundaryType  bx;
357   ISColoringValue   *colors;
358   DM_DA             *dd = (DM_DA*)da->data;
359 
360   PetscFunctionBegin;
361   /*
362          nc - number of components per grid point
363          col - number of colors needed in one direction for single component problem
364 
365   */
366   ierr = DMDAGetInfo(da,&dim,&m,0,0,&M,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr);
367   col    = 2*s + 1;
368 
369   if (bx == DMDA_BOUNDARY_PERIODIC && (m % col)) {
370     SETERRQ2(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points %d is divisible\n\
371                  by 2*stencil_width + 1 %d\n",(int)m,(int)col);
372   }
373 
374   ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
375   ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
376   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
377 
378   /* create the coloring */
379   if (ctype == IS_COLORING_GLOBAL) {
380     if (!dd->localcoloring) {
381       ierr = PetscMalloc(nc*nx*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
382       i1 = 0;
383       for (i=xs; i<xs+nx; i++) {
384         for (l=0; l<nc; l++) {
385           colors[i1++] = l + nc*(i % col);
386         }
387       }
388       ncolors = nc + nc*(col-1);
389       ierr = ISColoringCreate(comm,ncolors,nc*nx,colors,&dd->localcoloring);CHKERRQ(ierr);
390     }
391     *coloring = dd->localcoloring;
392   } else if (ctype == IS_COLORING_GHOSTED) {
393     if (!dd->ghostedcoloring) {
394       ierr = PetscMalloc(nc*gnx*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
395       i1 = 0;
396       for (i=gxs; i<gxs+gnx; i++) {
397         for (l=0; l<nc; l++) {
398           /* the complicated stuff is to handle periodic boundaries */
399           colors[i1++] = l + nc*(SetInRange(i,m) % col);
400         }
401       }
402       ncolors = nc + nc*(col-1);
403       ierr = ISColoringCreate(comm,ncolors,nc*gnx,colors,&dd->ghostedcoloring);CHKERRQ(ierr);
404       ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_GHOSTED);CHKERRQ(ierr);
405     }
406     *coloring = dd->ghostedcoloring;
407   } else SETERRQ1(((PetscObject)da)->comm,PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
408   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
409   PetscFunctionReturn(0);
410 }
411 
412 #undef __FUNCT__
413 #define __FUNCT__ "DMCreateColoring_DA_2d_5pt_MPIAIJ"
414 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
415 {
416   PetscErrorCode    ierr;
417   PetscInt          xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,dim,s,k,nc;
418   PetscInt          ncolors;
419   MPI_Comm          comm;
420   DMDABoundaryType  bx,by;
421   ISColoringValue   *colors;
422   DM_DA             *dd = (DM_DA*)da->data;
423 
424   PetscFunctionBegin;
425   /*
426          nc - number of components per grid point
427          col - number of colors needed in one direction for single component problem
428 
429   */
430   ierr   = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,0);CHKERRQ(ierr);
431   ierr   = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
432   ierr   = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
433   ierr   = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
434 
435   if (bx == DMDA_BOUNDARY_PERIODIC && (m % 5)){
436     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
437                  by 5\n");
438   }
439   if (by == DMDA_BOUNDARY_PERIODIC && (n % 5)){
440     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
441                  by 5\n");
442   }
443 
444   /* create the coloring */
445   if (ctype == IS_COLORING_GLOBAL) {
446     if (!dd->localcoloring) {
447       ierr = PetscMalloc(nc*nx*ny*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
448       ii = 0;
449       for (j=ys; j<ys+ny; j++) {
450 	for (i=xs; i<xs+nx; i++) {
451 	  for (k=0; k<nc; k++) {
452 	    colors[ii++] = k + nc*((3*j+i) % 5);
453 	  }
454 	}
455       }
456       ncolors = 5*nc;
457       ierr = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,&dd->localcoloring);CHKERRQ(ierr);
458     }
459     *coloring = dd->localcoloring;
460   } else if (ctype == IS_COLORING_GHOSTED) {
461     if (!dd->ghostedcoloring) {
462       ierr = PetscMalloc(nc*gnx*gny*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
463       ii = 0;
464       for (j=gys; j<gys+gny; j++) {
465 	for (i=gxs; i<gxs+gnx; i++) {
466 	  for (k=0; k<nc; k++) {
467 	    colors[ii++] = k + nc*((3*SetInRange(j,n) + SetInRange(i,m)) % 5);
468 	  }
469 	}
470       }
471       ncolors = 5*nc;
472       ierr = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,&dd->ghostedcoloring);CHKERRQ(ierr);
473       ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_GHOSTED);CHKERRQ(ierr);
474     }
475     *coloring = dd->ghostedcoloring;
476   } else SETERRQ1(((PetscObject)da)->comm,PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
477   PetscFunctionReturn(0);
478 }
479 
480 /* =========================================================================== */
481 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM,Mat);
482 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM,Mat);
483 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM,Mat);
484 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM,Mat);
485 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM,Mat);
486 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM,Mat);
487 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM,Mat);
488 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM,Mat);
489 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM,Mat);
490 
491 #undef __FUNCT__
492 #define __FUNCT__ "MatSetDM"
493 /*@
494    MatSetDM - Sets the DMDA that is to be used by the HYPRE_StructMatrix PETSc matrix
495 
496    Logically Collective on Mat
497 
498    Input Parameters:
499 +  mat - the matrix
500 -  da - the da
501 
502    Level: intermediate
503 
504 @*/
505 PetscErrorCode  MatSetDM(Mat mat,DM da)
506 {
507   PetscErrorCode ierr;
508 
509   PetscFunctionBegin;
510   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
511   PetscValidHeaderSpecific(da,DM_CLASSID,1);
512   ierr = PetscTryMethod(mat,"MatSetDM_C",(Mat,DM),(mat,da));CHKERRQ(ierr);
513   PetscFunctionReturn(0);
514 }
515 
516 EXTERN_C_BEGIN
517 #undef __FUNCT__
518 #define __FUNCT__ "MatView_MPI_DA"
519 PetscErrorCode  MatView_MPI_DA(Mat A,PetscViewer viewer)
520 {
521   DM             da;
522   PetscErrorCode ierr;
523   const char     *prefix;
524   Mat            Anatural;
525   AO             ao;
526   PetscInt       rstart,rend,*petsc,i;
527   IS             is;
528   MPI_Comm       comm;
529 
530   PetscFunctionBegin;
531   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
532   ierr = PetscObjectQuery((PetscObject)A,"DM",(PetscObject*)&da);CHKERRQ(ierr);
533   if (!da) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
534 
535   ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);
536   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
537   ierr = PetscMalloc((rend-rstart)*sizeof(PetscInt),&petsc);CHKERRQ(ierr);
538   for (i=rstart; i<rend; i++) petsc[i-rstart] = i;
539   ierr = AOApplicationToPetsc(ao,rend-rstart,petsc);CHKERRQ(ierr);
540   ierr = ISCreateGeneral(comm,rend-rstart,petsc,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
541 
542   /* call viewer on natural ordering */
543   ierr = MatGetSubMatrix(A,is,is,MAT_INITIAL_MATRIX,&Anatural);CHKERRQ(ierr);
544   ierr = ISDestroy(&is);CHKERRQ(ierr);
545   ierr = PetscObjectGetOptionsPrefix((PetscObject)A,&prefix);CHKERRQ(ierr);
546   ierr = PetscObjectSetOptionsPrefix((PetscObject)Anatural,prefix);CHKERRQ(ierr);
547   ierr = PetscObjectSetName((PetscObject)Anatural,((PetscObject)A)->name);CHKERRQ(ierr);
548   ierr = MatView(Anatural,viewer);CHKERRQ(ierr);
549   ierr = MatDestroy(&Anatural);CHKERRQ(ierr);
550   PetscFunctionReturn(0);
551 }
552 EXTERN_C_END
553 
554 EXTERN_C_BEGIN
555 #undef __FUNCT__
556 #define __FUNCT__ "MatLoad_MPI_DA"
557 PetscErrorCode  MatLoad_MPI_DA(Mat A,PetscViewer viewer)
558 {
559   DM             da;
560   PetscErrorCode ierr;
561   Mat            Anatural,Aapp;
562   AO             ao;
563   PetscInt       rstart,rend,*app,i;
564   IS             is;
565   MPI_Comm       comm;
566 
567   PetscFunctionBegin;
568   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
569   ierr = PetscObjectQuery((PetscObject)A,"DM",(PetscObject*)&da);CHKERRQ(ierr);
570   if (!da) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
571 
572   /* Load the matrix in natural ordering */
573   ierr = MatCreate(((PetscObject)A)->comm,&Anatural);CHKERRQ(ierr);
574   ierr = MatSetType(Anatural,((PetscObject)A)->type_name);CHKERRQ(ierr);
575   ierr = MatSetSizes(Anatural,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
576   ierr = MatLoad(Anatural,viewer);CHKERRQ(ierr);
577 
578   /* Map natural ordering to application ordering and create IS */
579   ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);
580   ierr = MatGetOwnershipRange(Anatural,&rstart,&rend);CHKERRQ(ierr);
581   ierr = PetscMalloc((rend-rstart)*sizeof(PetscInt),&app);CHKERRQ(ierr);
582   for (i=rstart; i<rend; i++) app[i-rstart] = i;
583   ierr = AOPetscToApplication(ao,rend-rstart,app);CHKERRQ(ierr);
584   ierr = ISCreateGeneral(comm,rend-rstart,app,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
585 
586   /* Do permutation and replace header */
587   ierr = MatGetSubMatrix(Anatural,is,is,MAT_INITIAL_MATRIX,&Aapp);CHKERRQ(ierr);
588   ierr = MatHeaderReplace(A,Aapp);CHKERRQ(ierr);
589   ierr = ISDestroy(&is);CHKERRQ(ierr);
590   ierr = MatDestroy(&Anatural);CHKERRQ(ierr);
591   PetscFunctionReturn(0);
592 }
593 EXTERN_C_END
594 
595 #undef __FUNCT__
596 #define __FUNCT__ "DMCreateMatrix_DA"
597 PetscErrorCode DMCreateMatrix_DA(DM da, const MatType mtype,Mat *J)
598 {
599   PetscErrorCode ierr;
600   PetscInt       dim,dof,nx,ny,nz,dims[3],starts[3],M,N,P;
601   Mat            A;
602   MPI_Comm       comm;
603   const MatType  Atype;
604   void           (*aij)(void)=PETSC_NULL,(*baij)(void)=PETSC_NULL,(*sbaij)(void)=PETSC_NULL;
605   MatType        ttype[256];
606   PetscBool      flg;
607   PetscMPIInt    size;
608   DM_DA          *dd = (DM_DA*)da->data;
609 
610   PetscFunctionBegin;
611 #ifndef PETSC_USE_DYNAMIC_LIBRARIES
612   ierr = MatInitializePackage(PETSC_NULL);CHKERRQ(ierr);
613 #endif
614   if (!mtype) mtype = MATAIJ;
615   ierr = PetscStrcpy((char*)ttype,mtype);CHKERRQ(ierr);
616   ierr = PetscOptionsBegin(((PetscObject)da)->comm,((PetscObject)da)->prefix,"DMDA options","Mat");CHKERRQ(ierr);
617   ierr = PetscOptionsList("-dm_mat_type","Matrix type","MatSetType",MatList,mtype,(char*)ttype,256,&flg);CHKERRQ(ierr);
618   ierr = PetscOptionsEnd();
619 
620   /*
621                                   m
622           ------------------------------------------------------
623          |                                                     |
624          |                                                     |
625          |               ----------------------                |
626          |               |                    |                |
627       n  |           ny  |                    |                |
628          |               |                    |                |
629          |               .---------------------                |
630          |             (xs,ys)     nx                          |
631          |            .                                        |
632          |         (gxs,gys)                                   |
633          |                                                     |
634           -----------------------------------------------------
635   */
636 
637   /*
638          nc - number of components per grid point
639          col - number of colors needed in one direction for single component problem
640 
641   */
642   ierr = DMDAGetInfo(da,&dim,&M,&N,&P,0,0,0,&dof,0,0,0,0,0);CHKERRQ(ierr);
643   ierr = DMDAGetCorners(da,0,0,0,&nx,&ny,&nz);CHKERRQ(ierr);
644   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
645   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
646   ierr = MatSetSizes(A,dof*nx*ny*nz,dof*nx*ny*nz,dof*M*N*P,dof*M*N*P);CHKERRQ(ierr);
647   ierr = MatSetType(A,(const MatType)ttype);CHKERRQ(ierr);
648   ierr = MatSetDM(A,da);CHKERRQ(ierr);
649   ierr = MatSetFromOptions(A);CHKERRQ(ierr);
650   ierr = MatGetType(A,&Atype);CHKERRQ(ierr);
651   /*
652      We do not provide a getmatrix function in the DMDA operations because
653    the basic DMDA does not know about matrices. We think of DMDA as being more
654    more low-level than matrices. This is kind of cheating but, cause sometimes
655    we think of DMDA has higher level than matrices.
656 
657      We could switch based on Atype (or mtype), but we do not since the
658    specialized setting routines depend only the particular preallocation
659    details of the matrix, not the type itself.
660   */
661   ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJSetPreallocation_C",&aij);CHKERRQ(ierr);
662   if (!aij) {
663     ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJSetPreallocation_C",&aij);CHKERRQ(ierr);
664   }
665   if (!aij) {
666     ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIBAIJSetPreallocation_C",&baij);CHKERRQ(ierr);
667     if (!baij) {
668       ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",&baij);CHKERRQ(ierr);
669     }
670     if (!baij){
671       ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPISBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr);
672       if (!sbaij) {
673         ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr);
674       }
675     }
676   }
677   if (aij) {
678     if (dim == 1) {
679       ierr = DMCreateMatrix_DA_1d_MPIAIJ(da,A);CHKERRQ(ierr);
680     } else if (dim == 2) {
681       if (dd->ofill) {
682         ierr = DMCreateMatrix_DA_2d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
683       } else {
684         ierr = DMCreateMatrix_DA_2d_MPIAIJ(da,A);CHKERRQ(ierr);
685       }
686     } else if (dim == 3) {
687       if (dd->ofill) {
688         ierr = DMCreateMatrix_DA_3d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
689       } else {
690         ierr = DMCreateMatrix_DA_3d_MPIAIJ(da,A);CHKERRQ(ierr);
691       }
692     }
693   } else if (baij) {
694     if (dim == 2) {
695       ierr = DMCreateMatrix_DA_2d_MPIBAIJ(da,A);CHKERRQ(ierr);
696     } else if (dim == 3) {
697       ierr = DMCreateMatrix_DA_3d_MPIBAIJ(da,A);CHKERRQ(ierr);
698     } else {
699       SETERRQ3(((PetscObject)da)->comm,PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension!\n" \
700 	       "Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
701     }
702   } else if (sbaij) {
703     if (dim == 2) {
704       ierr = DMCreateMatrix_DA_2d_MPISBAIJ(da,A);CHKERRQ(ierr);
705     } else if (dim == 3) {
706       ierr = DMCreateMatrix_DA_3d_MPISBAIJ(da,A);CHKERRQ(ierr);
707     } else {
708       SETERRQ3(((PetscObject)da)->comm,PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension!\n" \
709 	       "Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
710     }
711   } else {
712     ISLocalToGlobalMapping ltog,ltogb;
713     ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
714     ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
715     ierr = MatSetUp(A);CHKERRQ(ierr);
716     ierr = MatSetLocalToGlobalMapping(A,ltog,ltog);CHKERRQ(ierr);
717     ierr = MatSetLocalToGlobalMappingBlock(A,ltogb,ltogb);CHKERRQ(ierr);
718   }
719   ierr = DMDAGetGhostCorners(da,&starts[0],&starts[1],&starts[2],&dims[0],&dims[1],&dims[2]);CHKERRQ(ierr);
720   ierr = MatSetStencil(A,dim,dims,starts,dof);CHKERRQ(ierr);
721   ierr = PetscObjectCompose((PetscObject)A,"DM",(PetscObject)da);CHKERRQ(ierr);
722   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
723   if (size > 1) {
724     /* change viewer to display matrix in natural ordering */
725     ierr = MatShellSetOperation(A, MATOP_VIEW, (void (*)(void)) MatView_MPI_DA);CHKERRQ(ierr);
726     ierr = MatShellSetOperation(A, MATOP_LOAD, (void (*)(void)) MatLoad_MPI_DA);CHKERRQ(ierr);
727   }
728   *J = A;
729   PetscFunctionReturn(0);
730 }
731 
732 /* ---------------------------------------------------------------------------------*/
733 #undef __FUNCT__
734 #define __FUNCT__ "DMCreateMatrix_DA_2d_MPIAIJ"
735 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da,Mat J)
736 {
737   PetscErrorCode         ierr;
738   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = PETSC_NULL,k,nc,*rows = PETSC_NULL,col,cnt,l,p;
739   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
740   MPI_Comm               comm;
741   PetscScalar            *values;
742   DMDABoundaryType       bx,by;
743   ISLocalToGlobalMapping ltog,ltogb;
744   DMDAStencilType        st;
745 
746   PetscFunctionBegin;
747   /*
748          nc - number of components per grid point
749          col - number of colors needed in one direction for single component problem
750 
751   */
752   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
753   col = 2*s + 1;
754   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
755   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
756   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
757 
758   ierr = PetscMalloc2(nc,PetscInt,&rows,col*col*nc*nc,PetscInt,&cols);CHKERRQ(ierr);
759   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
760   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
761 
762   /* determine the matrix preallocation information */
763   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
764   for (i=xs; i<xs+nx; i++) {
765 
766     pstart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
767     pend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
768 
769     for (j=ys; j<ys+ny; j++) {
770       slot = i - gxs + gnx*(j - gys);
771 
772       lstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
773       lend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
774 
775       cnt  = 0;
776       for (k=0; k<nc; k++) {
777 	for (l=lstart; l<lend+1; l++) {
778 	  for (p=pstart; p<pend+1; p++) {
779 	    if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
780 	      cols[cnt++]  = k + nc*(slot + gnx*l + p);
781 	    }
782 	  }
783 	}
784 	rows[k] = k + nc*(slot);
785       }
786       ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
787     }
788   }
789   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
790   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
791   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
792   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
793 
794   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
795   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
796 
797   /*
798     For each node in the grid: we get the neighbors in the local (on processor ordering
799     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
800     PETSc ordering.
801   */
802   if (!da->prealloc_only) {
803     ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
804     ierr = PetscMemzero(values,col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
805     for (i=xs; i<xs+nx; i++) {
806 
807       pstart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
808       pend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
809 
810       for (j=ys; j<ys+ny; j++) {
811 	slot = i - gxs + gnx*(j - gys);
812 
813 	lstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
814 	lend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
815 
816 	cnt  = 0;
817 	for (k=0; k<nc; k++) {
818 	  for (l=lstart; l<lend+1; l++) {
819 	    for (p=pstart; p<pend+1; p++) {
820 	      if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
821 		cols[cnt++]  = k + nc*(slot + gnx*l + p);
822 	      }
823 	    }
824 	  }
825 	  rows[k]      = k + nc*(slot);
826 	}
827 	ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
828       }
829     }
830     ierr = PetscFree(values);CHKERRQ(ierr);
831     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
832     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
833   }
834   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
835   PetscFunctionReturn(0);
836 }
837 
838 #undef __FUNCT__
839 #define __FUNCT__ "DMCreateMatrix_DA_2d_MPIAIJ_Fill"
840 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da,Mat J)
841 {
842   PetscErrorCode         ierr;
843   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
844   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt,l,p;
845   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
846   DM_DA                  *dd = (DM_DA*)da->data;
847   PetscInt               ifill_col,*ofill = dd->ofill, *dfill = dd->dfill;
848   MPI_Comm               comm;
849   PetscScalar            *values;
850   DMDABoundaryType       bx,by;
851   ISLocalToGlobalMapping ltog,ltogb;
852   DMDAStencilType        st;
853 
854   PetscFunctionBegin;
855   /*
856          nc - number of components per grid point
857          col - number of colors needed in one direction for single component problem
858 
859   */
860   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
861   col = 2*s + 1;
862   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
863   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
864   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
865 
866   ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscInt),&cols);CHKERRQ(ierr);
867   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
868   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
869 
870   /* determine the matrix preallocation information */
871   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
872   for (i=xs; i<xs+nx; i++) {
873 
874     pstart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
875     pend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
876 
877     for (j=ys; j<ys+ny; j++) {
878       slot = i - gxs + gnx*(j - gys);
879 
880       lstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
881       lend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
882 
883       for (k=0; k<nc; k++) {
884         cnt  = 0;
885 	for (l=lstart; l<lend+1; l++) {
886 	  for (p=pstart; p<pend+1; p++) {
887             if (l || p) {
888 	      if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
889                 for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++)
890 		  cols[cnt++]  = ofill[ifill_col] + nc*(slot + gnx*l + p);
891 	      }
892             } else {
893 	      if (dfill) {
894 		for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++)
895 		  cols[cnt++]  = dfill[ifill_col] + nc*(slot + gnx*l + p);
896 	      } else {
897 		for (ifill_col=0; ifill_col<nc; ifill_col++)
898 		  cols[cnt++]  = ifill_col + nc*(slot + gnx*l + p);
899 	      }
900             }
901 	  }
902 	}
903 	row = k + nc*(slot);
904         ierr = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
905       }
906     }
907   }
908   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
909   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
910   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
911   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
912   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
913 
914   /*
915     For each node in the grid: we get the neighbors in the local (on processor ordering
916     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
917     PETSc ordering.
918   */
919   if (!da->prealloc_only) {
920     ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
921     ierr = PetscMemzero(values,col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
922     for (i=xs; i<xs+nx; i++) {
923 
924       pstart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
925       pend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
926 
927       for (j=ys; j<ys+ny; j++) {
928 	slot = i - gxs + gnx*(j - gys);
929 
930 	lstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
931 	lend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
932 
933 	for (k=0; k<nc; k++) {
934 	  cnt  = 0;
935 	  for (l=lstart; l<lend+1; l++) {
936 	    for (p=pstart; p<pend+1; p++) {
937 	      if (l || p) {
938 		if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
939 		  for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++)
940 		    cols[cnt++]  = ofill[ifill_col] + nc*(slot + gnx*l + p);
941 		}
942 	      } else {
943 		if (dfill) {
944 		  for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++)
945 		    cols[cnt++]  = dfill[ifill_col] + nc*(slot + gnx*l + p);
946 		} else {
947 		  for (ifill_col=0; ifill_col<nc; ifill_col++)
948 		    cols[cnt++]  = ifill_col + nc*(slot + gnx*l + p);
949 		}
950 	      }
951 	    }
952 	  }
953 	  row  = k + nc*(slot);
954 	  ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
955 	}
956       }
957     }
958     ierr = PetscFree(values);CHKERRQ(ierr);
959     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
960     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
961   }
962   ierr = PetscFree(cols);CHKERRQ(ierr);
963   PetscFunctionReturn(0);
964 }
965 
966 /* ---------------------------------------------------------------------------------*/
967 
968 #undef __FUNCT__
969 #define __FUNCT__ "DMCreateMatrix_DA_3d_MPIAIJ"
970 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da,Mat J)
971 {
972   PetscErrorCode         ierr;
973   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
974   PetscInt               m,n,dim,s,*cols = PETSC_NULL,k,nc,*rows = PETSC_NULL,col,cnt,l,p,*dnz = PETSC_NULL,*onz = PETSC_NULL;
975   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
976   MPI_Comm               comm;
977   PetscScalar            *values;
978   DMDABoundaryType       bx,by,bz;
979   ISLocalToGlobalMapping ltog,ltogb;
980   DMDAStencilType        st;
981 
982   PetscFunctionBegin;
983   /*
984          nc - number of components per grid point
985          col - number of colors needed in one direction for single component problem
986 
987   */
988   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
989   col    = 2*s + 1;
990 
991   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
992   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
993   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
994 
995   ierr = PetscMalloc2(nc,PetscInt,&rows,col*col*col*nc*nc,PetscInt,&cols);CHKERRQ(ierr);
996   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
997   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
998 
999   /* determine the matrix preallocation information */
1000   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1001   for (i=xs; i<xs+nx; i++) {
1002     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1003     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1004     for (j=ys; j<ys+ny; j++) {
1005       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1006       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1007       for (k=zs; k<zs+nz; k++) {
1008 	kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1009 	kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1010 
1011 	slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1012 
1013 	cnt  = 0;
1014 	for (l=0; l<nc; l++) {
1015 	  for (ii=istart; ii<iend+1; ii++) {
1016 	    for (jj=jstart; jj<jend+1; jj++) {
1017 	      for (kk=kstart; kk<kend+1; kk++) {
1018 		if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1019 		  cols[cnt++]  = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1020 		}
1021 	      }
1022 	    }
1023 	  }
1024 	  rows[l] = l + nc*(slot);
1025 	}
1026 	ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1027       }
1028     }
1029   }
1030   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1031   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1032   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1033   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1034   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1035   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1036 
1037   /*
1038     For each node in the grid: we get the neighbors in the local (on processor ordering
1039     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1040     PETSc ordering.
1041   */
1042   if (!da->prealloc_only) {
1043     ierr = PetscMalloc(col*col*col*nc*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1044     ierr = PetscMemzero(values,col*col*col*nc*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1045     for (i=xs; i<xs+nx; i++) {
1046       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1047       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1048       for (j=ys; j<ys+ny; j++) {
1049 	jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1050 	jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1051 	for (k=zs; k<zs+nz; k++) {
1052 	  kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1053 	  kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1054 
1055 	  slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1056 
1057 	  cnt  = 0;
1058 	  for (l=0; l<nc; l++) {
1059 	    for (ii=istart; ii<iend+1; ii++) {
1060 	      for (jj=jstart; jj<jend+1; jj++) {
1061 		for (kk=kstart; kk<kend+1; kk++) {
1062 		  if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1063 		    cols[cnt++]  = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1064 		  }
1065 		}
1066 	      }
1067 	    }
1068 	    rows[l]      = l + nc*(slot);
1069 	  }
1070 	  ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1071 	}
1072       }
1073     }
1074     ierr = PetscFree(values);CHKERRQ(ierr);
1075     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1076     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1077   }
1078   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1079   PetscFunctionReturn(0);
1080 }
1081 
1082 /* ---------------------------------------------------------------------------------*/
1083 
1084 #undef __FUNCT__
1085 #define __FUNCT__ "DMCreateMatrix_DA_1d_MPIAIJ"
1086 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da,Mat J)
1087 {
1088   PetscErrorCode         ierr;
1089   PetscInt               xs,nx,i,i1,slot,gxs,gnx;
1090   PetscInt               m,dim,s,*cols = PETSC_NULL,nc,*rows = PETSC_NULL,col,cnt,l;
1091   PetscInt               istart,iend;
1092   PetscScalar            *values;
1093   DMDABoundaryType       bx;
1094   ISLocalToGlobalMapping ltog,ltogb;
1095 
1096   PetscFunctionBegin;
1097   /*
1098          nc - number of components per grid point
1099          col - number of colors needed in one direction for single component problem
1100 
1101   */
1102   ierr = DMDAGetInfo(da,&dim,&m,0,0,0,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr);
1103   col    = 2*s + 1;
1104 
1105   ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
1106   ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
1107 
1108   ierr = MatSeqAIJSetPreallocation(J,col*nc,0);CHKERRQ(ierr);
1109   ierr = MatMPIAIJSetPreallocation(J,col*nc,0,col*nc,0);CHKERRQ(ierr);
1110   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1111   ierr = PetscMalloc2(nc,PetscInt,&rows,col*nc*nc,PetscInt,&cols);CHKERRQ(ierr);
1112 
1113   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1114   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1115   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1116   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1117 
1118   /*
1119     For each node in the grid: we get the neighbors in the local (on processor ordering
1120     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1121     PETSc ordering.
1122   */
1123   if (!da->prealloc_only) {
1124     ierr = PetscMalloc(col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1125     ierr = PetscMemzero(values,col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1126     for (i=xs; i<xs+nx; i++) {
1127       istart = PetscMax(-s,gxs - i);
1128       iend   = PetscMin(s,gxs + gnx - i - 1);
1129       slot   = i - gxs;
1130 
1131       cnt  = 0;
1132       for (l=0; l<nc; l++) {
1133 	for (i1=istart; i1<iend+1; i1++) {
1134 	  cols[cnt++] = l + nc*(slot + i1);
1135 	}
1136 	rows[l]      = l + nc*(slot);
1137       }
1138       ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1139     }
1140     ierr = PetscFree(values);CHKERRQ(ierr);
1141     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1142     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1143   }
1144   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1145   PetscFunctionReturn(0);
1146 }
1147 
1148 #undef __FUNCT__
1149 #define __FUNCT__ "DMCreateMatrix_DA_2d_MPIBAIJ"
1150 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da,Mat J)
1151 {
1152   PetscErrorCode         ierr;
1153   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1154   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
1155   PetscInt               istart,iend,jstart,jend,ii,jj;
1156   MPI_Comm               comm;
1157   PetscScalar            *values;
1158   DMDABoundaryType       bx,by;
1159   DMDAStencilType        st;
1160   ISLocalToGlobalMapping ltog,ltogb;
1161 
1162   PetscFunctionBegin;
1163   /*
1164      nc - number of components per grid point
1165      col - number of colors needed in one direction for single component problem
1166   */
1167   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
1168   col = 2*s + 1;
1169 
1170   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
1171   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
1172   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1173 
1174   ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1175 
1176   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1177   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1178 
1179   /* determine the matrix preallocation information */
1180   ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
1181   for (i=xs; i<xs+nx; i++) {
1182     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1183     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1184     for (j=ys; j<ys+ny; j++) {
1185       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1186       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1187       slot = i - gxs + gnx*(j - gys);
1188 
1189       /* Find block columns in block row */
1190       cnt  = 0;
1191       for (ii=istart; ii<iend+1; ii++) {
1192         for (jj=jstart; jj<jend+1; jj++) {
1193           if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1194             cols[cnt++]  = slot + ii + gnx*jj;
1195           }
1196         }
1197       }
1198       ierr = MatPreallocateSetLocal(ltogb,1,&slot,ltogb,cnt,cols,dnz,onz);CHKERRQ(ierr);
1199     }
1200   }
1201   ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1202   ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1203   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1204 
1205   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1206   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1207 
1208   /*
1209     For each node in the grid: we get the neighbors in the local (on processor ordering
1210     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1211     PETSc ordering.
1212   */
1213   if (!da->prealloc_only) {
1214     ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1215     ierr = PetscMemzero(values,col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1216     for (i=xs; i<xs+nx; i++) {
1217       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1218       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1219       for (j=ys; j<ys+ny; j++) {
1220 	jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1221 	jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1222 	slot = i - gxs + gnx*(j - gys);
1223 	cnt  = 0;
1224         for (ii=istart; ii<iend+1; ii++) {
1225           for (jj=jstart; jj<jend+1; jj++) {
1226             if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1227               cols[cnt++]  = slot + ii + gnx*jj;
1228             }
1229           }
1230         }
1231 	ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1232       }
1233     }
1234     ierr = PetscFree(values);CHKERRQ(ierr);
1235     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1236     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1237   }
1238   ierr = PetscFree(cols);CHKERRQ(ierr);
1239   PetscFunctionReturn(0);
1240 }
1241 
1242 #undef __FUNCT__
1243 #define __FUNCT__ "DMCreateMatrix_DA_3d_MPIBAIJ"
1244 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da,Mat J)
1245 {
1246   PetscErrorCode         ierr;
1247   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1248   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
1249   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1250   MPI_Comm               comm;
1251   PetscScalar            *values;
1252   DMDABoundaryType       bx,by,bz;
1253   DMDAStencilType        st;
1254   ISLocalToGlobalMapping ltog,ltogb;
1255 
1256   PetscFunctionBegin;
1257   /*
1258          nc - number of components per grid point
1259          col - number of colors needed in one direction for single component problem
1260 
1261   */
1262   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1263   col    = 2*s + 1;
1264 
1265   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1266   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1267   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1268 
1269   ierr  = PetscMalloc(col*col*col*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1270 
1271   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1272   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1273 
1274   /* determine the matrix preallocation information */
1275   ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1276   for (i=xs; i<xs+nx; i++) {
1277     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1278     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1279     for (j=ys; j<ys+ny; j++) {
1280       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1281       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1282       for (k=zs; k<zs+nz; k++) {
1283 	kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1284 	kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1285 
1286 	slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1287 
1288 	/* Find block columns in block row */
1289 	cnt  = 0;
1290         for (ii=istart; ii<iend+1; ii++) {
1291           for (jj=jstart; jj<jend+1; jj++) {
1292             for (kk=kstart; kk<kend+1; kk++) {
1293               if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1294 		cols[cnt++]  = slot + ii + gnx*jj + gnx*gny*kk;
1295 	      }
1296 	    }
1297 	  }
1298 	}
1299 	ierr = MatPreallocateSetLocal(ltogb,1,&slot,ltogb,cnt,cols,dnz,onz);CHKERRQ(ierr);
1300       }
1301     }
1302   }
1303   ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1304   ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1305   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1306 
1307   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1308   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1309 
1310   /*
1311     For each node in the grid: we get the neighbors in the local (on processor ordering
1312     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1313     PETSc ordering.
1314   */
1315   if (!da->prealloc_only) {
1316     ierr  = PetscMalloc(col*col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1317     ierr  = PetscMemzero(values,col*col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1318     for (i=xs; i<xs+nx; i++) {
1319       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1320       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1321       for (j=ys; j<ys+ny; j++) {
1322 	jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1323 	jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1324 	for (k=zs; k<zs+nz; k++) {
1325 	  kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1326 	  kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1327 
1328 	  slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1329 
1330 	  cnt  = 0;
1331           for (ii=istart; ii<iend+1; ii++) {
1332             for (jj=jstart; jj<jend+1; jj++) {
1333               for (kk=kstart; kk<kend+1; kk++) {
1334                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1335                   cols[cnt++]  = slot + ii + gnx*jj + gnx*gny*kk;
1336                 }
1337               }
1338             }
1339           }
1340 	  ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1341 	}
1342       }
1343     }
1344     ierr = PetscFree(values);CHKERRQ(ierr);
1345     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1346     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1347   }
1348   ierr = PetscFree(cols);CHKERRQ(ierr);
1349   PetscFunctionReturn(0);
1350 }
1351 
1352 #undef __FUNCT__
1353 #define __FUNCT__ "L2GFilterUpperTriangular"
1354 /*
1355   This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to
1356   identify in the local ordering with periodic domain.
1357 */
1358 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog,PetscInt *row,PetscInt *cnt,PetscInt col[])
1359 {
1360   PetscErrorCode ierr;
1361   PetscInt       i,n;
1362 
1363   PetscFunctionBegin;
1364   ierr = ISLocalToGlobalMappingApply(ltog,1,row,row);CHKERRQ(ierr);
1365   ierr = ISLocalToGlobalMappingApply(ltog,*cnt,col,col);CHKERRQ(ierr);
1366   for (i=0,n=0; i<*cnt; i++) {
1367     if (col[i] >= *row) col[n++] = col[i];
1368   }
1369   *cnt = n;
1370   PetscFunctionReturn(0);
1371 }
1372 
1373 #undef __FUNCT__
1374 #define __FUNCT__ "DMCreateMatrix_DA_2d_MPISBAIJ"
1375 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da,Mat J)
1376 {
1377   PetscErrorCode         ierr;
1378   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1379   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
1380   PetscInt               istart,iend,jstart,jend,ii,jj;
1381   MPI_Comm               comm;
1382   PetscScalar            *values;
1383   DMDABoundaryType       bx,by;
1384   DMDAStencilType        st;
1385   ISLocalToGlobalMapping ltog,ltogb;
1386 
1387   PetscFunctionBegin;
1388   /*
1389      nc - number of components per grid point
1390      col - number of colors needed in one direction for single component problem
1391   */
1392   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
1393   col = 2*s + 1;
1394 
1395   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
1396   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
1397   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1398 
1399   ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1400 
1401   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1402   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1403 
1404   /* determine the matrix preallocation information */
1405   ierr = MatPreallocateSymmetricInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
1406   for (i=xs; i<xs+nx; i++) {
1407     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1408     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1409     for (j=ys; j<ys+ny; j++) {
1410       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1411       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1412       slot = i - gxs + gnx*(j - gys);
1413 
1414       /* Find block columns in block row */
1415       cnt  = 0;
1416       for (ii=istart; ii<iend+1; ii++) {
1417         for (jj=jstart; jj<jend+1; jj++) {
1418           if (st == DMDA_STENCIL_BOX || !ii || !jj) {
1419             cols[cnt++]  = slot + ii + gnx*jj;
1420           }
1421         }
1422       }
1423       ierr = L2GFilterUpperTriangular(ltogb,&slot,&cnt,cols);CHKERRQ(ierr);
1424       ierr = MatPreallocateSymmetricSet(slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
1425     }
1426   }
1427   ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1428   ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1429   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1430 
1431   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1432   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1433 
1434   /*
1435     For each node in the grid: we get the neighbors in the local (on processor ordering
1436     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1437     PETSc ordering.
1438   */
1439   if (!da->prealloc_only) {
1440     ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1441     ierr = PetscMemzero(values,col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1442     for (i=xs; i<xs+nx; i++) {
1443       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1444       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1445       for (j=ys; j<ys+ny; j++) {
1446         jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1447         jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1448         slot = i - gxs + gnx*(j - gys);
1449 
1450         /* Find block columns in block row */
1451         cnt  = 0;
1452         for (ii=istart; ii<iend+1; ii++) {
1453           for (jj=jstart; jj<jend+1; jj++) {
1454             if (st == DMDA_STENCIL_BOX || !ii || !jj) {
1455               cols[cnt++]  = slot + ii + gnx*jj;
1456             }
1457           }
1458         }
1459         ierr = L2GFilterUpperTriangular(ltogb,&slot,&cnt,cols);CHKERRQ(ierr);
1460 	ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1461       }
1462     }
1463     ierr = PetscFree(values);CHKERRQ(ierr);
1464     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1465     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1466   }
1467   ierr = PetscFree(cols);CHKERRQ(ierr);
1468   PetscFunctionReturn(0);
1469 }
1470 
1471 #undef __FUNCT__
1472 #define __FUNCT__ "DMCreateMatrix_DA_3d_MPISBAIJ"
1473 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da,Mat J)
1474 {
1475   PetscErrorCode         ierr;
1476   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1477   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
1478   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1479   MPI_Comm               comm;
1480   PetscScalar            *values;
1481   DMDABoundaryType       bx,by,bz;
1482   DMDAStencilType        st;
1483   ISLocalToGlobalMapping ltog,ltogb;
1484 
1485   PetscFunctionBegin;
1486   /*
1487      nc - number of components per grid point
1488      col - number of colors needed in one direction for single component problem
1489   */
1490   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1491   col = 2*s + 1;
1492 
1493   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1494   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1495   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1496 
1497   /* create the matrix */
1498   ierr = PetscMalloc(col*col*col*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1499 
1500   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1501   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1502 
1503   /* determine the matrix preallocation information */
1504   ierr = MatPreallocateSymmetricInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1505   for (i=xs; i<xs+nx; i++) {
1506     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1507     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1508     for (j=ys; j<ys+ny; j++) {
1509       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1510       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1511       for (k=zs; k<zs+nz; k++) {
1512         kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1513 	kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1514 
1515 	slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1516 
1517 	/* Find block columns in block row */
1518 	cnt  = 0;
1519         for (ii=istart; ii<iend+1; ii++) {
1520           for (jj=jstart; jj<jend+1; jj++) {
1521             for (kk=kstart; kk<kend+1; kk++) {
1522               if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
1523                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1524               }
1525             }
1526           }
1527         }
1528         ierr = L2GFilterUpperTriangular(ltogb,&slot,&cnt,cols);CHKERRQ(ierr);
1529         ierr = MatPreallocateSymmetricSet(slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
1530       }
1531     }
1532   }
1533   ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1534   ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1535   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1536 
1537   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1538   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1539 
1540   /*
1541     For each node in the grid: we get the neighbors in the local (on processor ordering
1542     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1543     PETSc ordering.
1544   */
1545   if (!da->prealloc_only) {
1546     ierr = PetscMalloc(col*col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1547     ierr = PetscMemzero(values,col*col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1548     for (i=xs; i<xs+nx; i++) {
1549       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1550       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1551       for (j=ys; j<ys+ny; j++) {
1552         jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1553 	jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1554 	for (k=zs; k<zs+nz; k++) {
1555           kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1556 	  kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1557 
1558 	  slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1559 
1560 	  cnt  = 0;
1561           for (ii=istart; ii<iend+1; ii++) {
1562             for (jj=jstart; jj<jend+1; jj++) {
1563               for (kk=kstart; kk<kend+1; kk++) {
1564                 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
1565 		  cols[cnt++]  = slot + ii + gnx*jj + gnx*gny*kk;
1566 		}
1567 	      }
1568 	    }
1569 	  }
1570           ierr = L2GFilterUpperTriangular(ltogb,&slot,&cnt,cols);CHKERRQ(ierr);
1571           ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1572 	}
1573       }
1574     }
1575     ierr = PetscFree(values);CHKERRQ(ierr);
1576     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1577     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1578   }
1579   ierr = PetscFree(cols);CHKERRQ(ierr);
1580   PetscFunctionReturn(0);
1581 }
1582 
1583 /* ---------------------------------------------------------------------------------*/
1584 
1585 #undef __FUNCT__
1586 #define __FUNCT__ "DMCreateMatrix_DA_3d_MPIAIJ_Fill"
1587 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da,Mat J)
1588 {
1589   PetscErrorCode         ierr;
1590   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1591   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt,l,p,*dnz,*onz;
1592   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1593   DM_DA                  *dd = (DM_DA*)da->data;
1594   PetscInt               ifill_col,*dfill = dd->dfill,*ofill = dd->ofill;
1595   MPI_Comm               comm;
1596   PetscScalar            *values;
1597   DMDABoundaryType       bx,by,bz;
1598   ISLocalToGlobalMapping ltog,ltogb;
1599   DMDAStencilType        st;
1600 
1601   PetscFunctionBegin;
1602   /*
1603          nc - number of components per grid point
1604          col - number of colors needed in one direction for single component problem
1605 
1606   */
1607   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1608   col    = 2*s + 1;
1609   if (bx == DMDA_BOUNDARY_PERIODIC && (m % col)){
1610     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
1611                  by 2*stencil_width + 1\n");
1612   }
1613   if (by == DMDA_BOUNDARY_PERIODIC && (n % col)){
1614     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
1615                  by 2*stencil_width + 1\n");
1616   }
1617   if (bz == DMDA_BOUNDARY_PERIODIC && (p % col)){
1618     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
1619                  by 2*stencil_width + 1\n");
1620   }
1621 
1622   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1623   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1624   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1625 
1626   ierr = PetscMalloc(col*col*col*nc*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1627   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1628   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1629 
1630   /* determine the matrix preallocation information */
1631   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1632 
1633 
1634   for (i=xs; i<xs+nx; i++) {
1635     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1636     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1637     for (j=ys; j<ys+ny; j++) {
1638       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1639       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1640       for (k=zs; k<zs+nz; k++) {
1641         kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1642         kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1643 
1644         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1645 
1646 	for (l=0; l<nc; l++) {
1647 	  cnt  = 0;
1648 	  for (ii=istart; ii<iend+1; ii++) {
1649 	    for (jj=jstart; jj<jend+1; jj++) {
1650 	      for (kk=kstart; kk<kend+1; kk++) {
1651 		if (ii || jj || kk) {
1652 		  if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1653 		    for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++)
1654 		      cols[cnt++]  = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1655 		  }
1656 		} else {
1657 		  if (dfill) {
1658 		    for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++)
1659 		      cols[cnt++]  = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1660 		  } else {
1661 		    for (ifill_col=0; ifill_col<nc; ifill_col++)
1662 		      cols[cnt++]  = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1663 		  }
1664 		}
1665 	      }
1666 	    }
1667 	  }
1668 	  row  = l + nc*(slot);
1669 	  ierr = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1670 	}
1671       }
1672     }
1673   }
1674   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1675   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1676   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1677   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1678   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1679 
1680   /*
1681     For each node in the grid: we get the neighbors in the local (on processor ordering
1682     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1683     PETSc ordering.
1684   */
1685   if (!da->prealloc_only) {
1686     ierr = PetscMalloc(col*col*col*nc*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1687     ierr = PetscMemzero(values,col*col*col*nc*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1688     for (i=xs; i<xs+nx; i++) {
1689       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1690       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1691       for (j=ys; j<ys+ny; j++) {
1692 	jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1693 	jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1694 	for (k=zs; k<zs+nz; k++) {
1695 	  kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1696 	  kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1697 
1698 	  slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1699 
1700 	  for (l=0; l<nc; l++) {
1701 	    cnt  = 0;
1702 	    for (ii=istart; ii<iend+1; ii++) {
1703 	      for (jj=jstart; jj<jend+1; jj++) {
1704 		for (kk=kstart; kk<kend+1; kk++) {
1705 		  if (ii || jj || kk) {
1706 		    if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1707 		      for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++)
1708 			cols[cnt++]  = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1709 		    }
1710 		  } else {
1711 		    if (dfill) {
1712 		      for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++)
1713 			cols[cnt++]  = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1714 		    } else {
1715 		      for (ifill_col=0; ifill_col<nc; ifill_col++)
1716 			cols[cnt++]  = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1717 		    }
1718 		  }
1719 		}
1720 	      }
1721 	    }
1722 	    row  = l + nc*(slot);
1723 	    ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1724 	  }
1725 	}
1726       }
1727     }
1728     ierr = PetscFree(values);CHKERRQ(ierr);
1729     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1730     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1731   }
1732   ierr = PetscFree(cols);CHKERRQ(ierr);
1733   PetscFunctionReturn(0);
1734 }
1735