xref: /petsc/src/dm/impls/da/fdda.c (revision cdbf8f939cdfb1c797c4b7f2cbbd00be19935363)
1 
2 #include <private/daimpl.h> /*I      "petscdmda.h"     I*/
3 #include <petscmat.h>         /*I      "petscmat.h"    I*/
4 #include <private/matimpl.h>
5 
6 extern PetscErrorCode DMGetColoring_DA_1d_MPIAIJ(DM,ISColoringType,ISColoring *);
7 extern PetscErrorCode DMGetColoring_DA_2d_MPIAIJ(DM,ISColoringType,ISColoring *);
8 extern PetscErrorCode DMGetColoring_DA_2d_5pt_MPIAIJ(DM,ISColoringType,ISColoring *);
9 extern PetscErrorCode DMGetColoring_DA_3d_MPIAIJ(DM,ISColoringType,ISColoring *);
10 
11 /*
12    For ghost i that may be negative or greater than the upper bound this
13   maps it into the 0:m-1 range using periodicity
14 */
15 #define SetInRange(i,m) ((i < 0) ? m+i:((i >= m) ? i-m:i))
16 
17 #undef __FUNCT__
18 #define __FUNCT__ "DMDASetBlockFills_Private"
19 static PetscErrorCode DMDASetBlockFills_Private(PetscInt *dfill,PetscInt w,PetscInt **rfill)
20 {
21   PetscErrorCode ierr;
22   PetscInt       i,j,nz,*fill;
23 
24   PetscFunctionBegin;
25   if (!dfill) PetscFunctionReturn(0);
26 
27   /* count number nonzeros */
28   nz = 0;
29   for (i=0; i<w; i++) {
30     for (j=0; j<w; j++) {
31       if (dfill[w*i+j]) nz++;
32     }
33   }
34   ierr = PetscMalloc((nz + w + 1)*sizeof(PetscInt),&fill);CHKERRQ(ierr);
35   /* construct modified CSR storage of nonzero structure */
36   nz = w + 1;
37   for (i=0; i<w; i++) {
38     fill[i] = nz;
39     for (j=0; j<w; j++) {
40       if (dfill[w*i+j]) {
41 	fill[nz] = j;
42 	nz++;
43       }
44     }
45   }
46   fill[w] = nz;
47 
48   *rfill = fill;
49   PetscFunctionReturn(0);
50 }
51 
52 #undef __FUNCT__
53 #define __FUNCT__ "DMDASetBlockFills"
54 /*@
55     DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem
56     of the matrix returned by DMGetMatrix().
57 
58     Logically Collective on DMDA
59 
60     Input Parameter:
61 +   da - the distributed array
62 .   dfill - the fill pattern in the diagonal block (may be PETSC_NULL, means use dense block)
63 -   ofill - the fill pattern in the off-diagonal blocks
64 
65 
66     Level: developer
67 
68     Notes: This only makes sense when you are doing multicomponent problems but using the
69        MPIAIJ matrix format
70 
71            The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries
72        representing coupling and 0 entries for missing coupling. For example
73 $             dfill[9] = {1, 0, 0,
74 $                         1, 1, 0,
75 $                         0, 1, 1}
76        means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with
77        itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the
78        diagonal block).
79 
80      DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
81      can be represented in the dfill, ofill format
82 
83    Contributed by Glenn Hammond
84 
85 .seealso DMGetMatrix(), DMDASetGetMatrix(), DMDASetMatPreallocateOnly()
86 
87 @*/
88 PetscErrorCode  DMDASetBlockFills(DM da,PetscInt *dfill,PetscInt *ofill)
89 {
90   DM_DA          *dd = (DM_DA*)da->data;
91   PetscErrorCode ierr;
92 
93   PetscFunctionBegin;
94   ierr = DMDASetBlockFills_Private(dfill,dd->w,&dd->dfill);CHKERRQ(ierr);
95   ierr = DMDASetBlockFills_Private(ofill,dd->w,&dd->ofill);CHKERRQ(ierr);
96   PetscFunctionReturn(0);
97 }
98 
99 
100 #undef __FUNCT__
101 #define __FUNCT__ "DMGetColoring_DA"
102 PetscErrorCode  DMGetColoring_DA(DM da,ISColoringType ctype,const MatType mtype,ISColoring *coloring)
103 {
104   PetscErrorCode   ierr;
105   PetscInt         dim,m,n,p,nc;
106   DMDABoundaryType bx,by,bz;
107   MPI_Comm         comm;
108   PetscMPIInt      size;
109   PetscBool        isBAIJ;
110   DM_DA            *dd = (DM_DA*)da->data;
111 
112   PetscFunctionBegin;
113   /*
114                                   m
115           ------------------------------------------------------
116          |                                                     |
117          |                                                     |
118          |               ----------------------                |
119          |               |                    |                |
120       n  |           yn  |                    |                |
121          |               |                    |                |
122          |               .---------------------                |
123          |             (xs,ys)     xn                          |
124          |            .                                        |
125          |         (gxs,gys)                                   |
126          |                                                     |
127           -----------------------------------------------------
128   */
129 
130   /*
131          nc - number of components per grid point
132          col - number of colors needed in one direction for single component problem
133 
134   */
135   ierr = DMDAGetInfo(da,&dim,0,0,0,&m,&n,&p,&nc,0,&bx,&by,&bz,0);CHKERRQ(ierr);
136 
137   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
138   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
139   if (ctype == IS_COLORING_GHOSTED){
140     if (size == 1) {
141       ctype = IS_COLORING_GLOBAL;
142     } else if (dim > 1){
143       if ((m==1 && bx == DMDA_BOUNDARY_PERIODIC) || (n==1 && by == DMDA_BOUNDARY_PERIODIC) || (p==1 && bz == DMDA_BOUNDARY_PERIODIC)){
144         SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"IS_COLORING_GHOSTED cannot be used for periodic boundary condition having both ends of the domain  on the same process");
145       }
146     }
147   }
148 
149   /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ
150      matrices is for the blocks, not the individual matrix elements  */
151   ierr = PetscStrcmp(mtype,MATBAIJ,&isBAIJ);CHKERRQ(ierr);
152   if (!isBAIJ) {ierr = PetscStrcmp(mtype,MATMPIBAIJ,&isBAIJ);CHKERRQ(ierr);}
153   if (!isBAIJ) {ierr = PetscStrcmp(mtype,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);}
154   if (isBAIJ) {
155     dd->w = 1;
156     dd->xs = dd->xs/nc;
157     dd->xe = dd->xe/nc;
158     dd->Xs = dd->Xs/nc;
159     dd->Xe = dd->Xe/nc;
160   }
161 
162   /*
163      We do not provide a getcoloring function in the DMDA operations because
164    the basic DMDA does not know about matrices. We think of DMDA as being more
165    more low-level then matrices.
166   */
167   if (dim == 1) {
168     ierr = DMGetColoring_DA_1d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
169   } else if (dim == 2) {
170     ierr =  DMGetColoring_DA_2d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
171   } else if (dim == 3) {
172     ierr =  DMGetColoring_DA_3d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
173   } else SETERRQ1(((PetscObject)da)->comm,PETSC_ERR_SUP,"Not done for %D dimension, send us mail petsc-maint@mcs.anl.gov for code",dim);
174   if (isBAIJ) {
175     dd->w = nc;
176     dd->xs = dd->xs*nc;
177     dd->xe = dd->xe*nc;
178     dd->Xs = dd->Xs*nc;
179     dd->Xe = dd->Xe*nc;
180   }
181   PetscFunctionReturn(0);
182 }
183 
184 /* ---------------------------------------------------------------------------------*/
185 
186 #undef __FUNCT__
187 #define __FUNCT__ "DMGetColoring_DA_2d_MPIAIJ"
188 PetscErrorCode DMGetColoring_DA_2d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
189 {
190   PetscErrorCode         ierr;
191   PetscInt               xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,M,N,dim,s,k,nc,col;
192   PetscInt               ncolors;
193   MPI_Comm               comm;
194   DMDABoundaryType       bx,by;
195   DMDAStencilType        st;
196   ISColoringValue        *colors;
197   DM_DA                  *dd = (DM_DA*)da->data;
198 
199   PetscFunctionBegin;
200   /*
201          nc - number of components per grid point
202          col - number of colors needed in one direction for single component problem
203 
204   */
205   ierr = DMDAGetInfo(da,&dim,&m,&n,0,&M,&N,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
206   col    = 2*s + 1;
207   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
208   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
209   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
210 
211   /* special case as taught to us by Paul Hovland */
212   if (st == DMDA_STENCIL_STAR && s == 1) {
213     ierr = DMGetColoring_DA_2d_5pt_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
214   } else {
215 
216     if (bx == DMDA_BOUNDARY_PERIODIC && (m % col)){
217       SETERRQ2(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X (%d) is divisible\n\
218                  by 2*stencil_width + 1 (%d)\n", m, col);
219     }
220     if (by == DMDA_BOUNDARY_PERIODIC && (n % col)){
221       SETERRQ2(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y (%d) is divisible\n\
222                  by 2*stencil_width + 1 (%d)\n", n, col);
223     }
224     if (ctype == IS_COLORING_GLOBAL) {
225       if (!dd->localcoloring) {
226 	ierr = PetscMalloc(nc*nx*ny*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
227 	ii = 0;
228 	for (j=ys; j<ys+ny; j++) {
229 	  for (i=xs; i<xs+nx; i++) {
230 	    for (k=0; k<nc; k++) {
231 	      colors[ii++] = k + nc*((i % col) + col*(j % col));
232 	    }
233 	  }
234 	}
235         ncolors = nc + nc*(col-1 + col*(col-1));
236 	ierr = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,&dd->localcoloring);CHKERRQ(ierr);
237       }
238       *coloring = dd->localcoloring;
239     } else if (ctype == IS_COLORING_GHOSTED) {
240       if (!dd->ghostedcoloring) {
241 	ierr = PetscMalloc(nc*gnx*gny*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
242 	ii = 0;
243 	for (j=gys; j<gys+gny; j++) {
244 	  for (i=gxs; i<gxs+gnx; i++) {
245 	    for (k=0; k<nc; k++) {
246 	      /* the complicated stuff is to handle periodic boundaries */
247 	      colors[ii++] = k + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col));
248 	    }
249 	  }
250 	}
251         ncolors = nc + nc*(col - 1 + col*(col-1));
252 	ierr = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,&dd->ghostedcoloring);CHKERRQ(ierr);
253         /* PetscIntView(ncolors,(PetscInt *)colors,0); */
254 
255 	ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_GHOSTED);CHKERRQ(ierr);
256       }
257       *coloring = dd->ghostedcoloring;
258     } else SETERRQ1(((PetscObject)da)->comm,PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
259   }
260   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
261   PetscFunctionReturn(0);
262 }
263 
264 /* ---------------------------------------------------------------------------------*/
265 
266 #undef __FUNCT__
267 #define __FUNCT__ "DMGetColoring_DA_3d_MPIAIJ"
268 PetscErrorCode DMGetColoring_DA_3d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
269 {
270   PetscErrorCode    ierr;
271   PetscInt          xs,ys,nx,ny,i,j,gxs,gys,gnx,gny,m,n,p,dim,s,k,nc,col,zs,gzs,ii,l,nz,gnz,M,N,P;
272   PetscInt          ncolors;
273   MPI_Comm          comm;
274   DMDABoundaryType  bx,by,bz;
275   DMDAStencilType   st;
276   ISColoringValue   *colors;
277   DM_DA             *dd = (DM_DA*)da->data;
278 
279   PetscFunctionBegin;
280   /*
281          nc - number of components per grid point
282          col - number of colors needed in one direction for single component problem
283 
284   */
285   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
286   col    = 2*s + 1;
287   if (bx == DMDA_BOUNDARY_PERIODIC && (m % col)){
288     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
289                  by 2*stencil_width + 1\n");
290   }
291   if (by == DMDA_BOUNDARY_PERIODIC && (n % col)){
292     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
293                  by 2*stencil_width + 1\n");
294   }
295   if (bz == DMDA_BOUNDARY_PERIODIC && (p % col)){
296     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
297                  by 2*stencil_width + 1\n");
298   }
299 
300   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
301   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
302   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
303 
304   /* create the coloring */
305   if (ctype == IS_COLORING_GLOBAL) {
306     if (!dd->localcoloring) {
307       ierr = PetscMalloc(nc*nx*ny*nz*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
308       ii = 0;
309       for (k=zs; k<zs+nz; k++) {
310         for (j=ys; j<ys+ny; j++) {
311           for (i=xs; i<xs+nx; i++) {
312             for (l=0; l<nc; l++) {
313               colors[ii++] = l + nc*((i % col) + col*(j % col) + col*col*(k % col));
314             }
315           }
316         }
317       }
318       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
319       ierr = ISColoringCreate(comm,ncolors,nc*nx*ny*nz,colors,&dd->localcoloring);CHKERRQ(ierr);
320     }
321     *coloring = dd->localcoloring;
322   } else if (ctype == IS_COLORING_GHOSTED) {
323     if (!dd->ghostedcoloring) {
324       ierr = PetscMalloc(nc*gnx*gny*gnz*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
325       ii = 0;
326       for (k=gzs; k<gzs+gnz; k++) {
327         for (j=gys; j<gys+gny; j++) {
328           for (i=gxs; i<gxs+gnx; i++) {
329             for (l=0; l<nc; l++) {
330               /* the complicated stuff is to handle periodic boundaries */
331               colors[ii++] = l + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col) + col*col*(SetInRange(k,p) % col));
332             }
333           }
334         }
335       }
336       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
337       ierr = ISColoringCreate(comm,ncolors,nc*gnx*gny*gnz,colors,&dd->ghostedcoloring);CHKERRQ(ierr);
338       ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_GHOSTED);CHKERRQ(ierr);
339     }
340     *coloring = dd->ghostedcoloring;
341   } else SETERRQ1(((PetscObject)da)->comm,PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
342   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
343   PetscFunctionReturn(0);
344 }
345 
346 /* ---------------------------------------------------------------------------------*/
347 
348 #undef __FUNCT__
349 #define __FUNCT__ "DMGetColoring_DA_1d_MPIAIJ"
350 PetscErrorCode DMGetColoring_DA_1d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
351 {
352   PetscErrorCode    ierr;
353   PetscInt          xs,nx,i,i1,gxs,gnx,l,m,M,dim,s,nc,col;
354   PetscInt          ncolors;
355   MPI_Comm          comm;
356   DMDABoundaryType  bx;
357   ISColoringValue   *colors;
358   DM_DA             *dd = (DM_DA*)da->data;
359 
360   PetscFunctionBegin;
361   /*
362          nc - number of components per grid point
363          col - number of colors needed in one direction for single component problem
364 
365   */
366   ierr = DMDAGetInfo(da,&dim,&m,0,0,&M,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr);
367   col    = 2*s + 1;
368 
369   if (bx == DMDA_BOUNDARY_PERIODIC && (m % col)) {
370     SETERRQ2(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points %d is divisible\n\
371                  by 2*stencil_width + 1 %d\n",(int)m,(int)col);
372   }
373 
374   ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
375   ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
376   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
377 
378   /* create the coloring */
379   if (ctype == IS_COLORING_GLOBAL) {
380     if (!dd->localcoloring) {
381       ierr = PetscMalloc(nc*nx*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
382       i1 = 0;
383       for (i=xs; i<xs+nx; i++) {
384         for (l=0; l<nc; l++) {
385           colors[i1++] = l + nc*(i % col);
386         }
387       }
388       ncolors = nc + nc*(col-1);
389       ierr = ISColoringCreate(comm,ncolors,nc*nx,colors,&dd->localcoloring);CHKERRQ(ierr);
390     }
391     *coloring = dd->localcoloring;
392   } else if (ctype == IS_COLORING_GHOSTED) {
393     if (!dd->ghostedcoloring) {
394       ierr = PetscMalloc(nc*gnx*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
395       i1 = 0;
396       for (i=gxs; i<gxs+gnx; i++) {
397         for (l=0; l<nc; l++) {
398           /* the complicated stuff is to handle periodic boundaries */
399           colors[i1++] = l + nc*(SetInRange(i,m) % col);
400         }
401       }
402       ncolors = nc + nc*(col-1);
403       ierr = ISColoringCreate(comm,ncolors,nc*gnx,colors,&dd->ghostedcoloring);CHKERRQ(ierr);
404       ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_GHOSTED);CHKERRQ(ierr);
405     }
406     *coloring = dd->ghostedcoloring;
407   } else SETERRQ1(((PetscObject)da)->comm,PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
408   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
409   PetscFunctionReturn(0);
410 }
411 
412 #undef __FUNCT__
413 #define __FUNCT__ "DMGetColoring_DA_2d_5pt_MPIAIJ"
414 PetscErrorCode DMGetColoring_DA_2d_5pt_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
415 {
416   PetscErrorCode    ierr;
417   PetscInt          xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,dim,s,k,nc;
418   PetscInt          ncolors;
419   MPI_Comm          comm;
420   DMDABoundaryType  bx,by;
421   ISColoringValue   *colors;
422   DM_DA             *dd = (DM_DA*)da->data;
423 
424   PetscFunctionBegin;
425   /*
426          nc - number of components per grid point
427          col - number of colors needed in one direction for single component problem
428 
429   */
430   ierr   = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,0);CHKERRQ(ierr);
431   ierr   = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
432   ierr   = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
433   ierr   = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
434 
435   if (bx == DMDA_BOUNDARY_PERIODIC && (m % 5)){
436     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
437                  by 5\n");
438   }
439   if (by == DMDA_BOUNDARY_PERIODIC && (n % 5)){
440     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
441                  by 5\n");
442   }
443 
444   /* create the coloring */
445   if (ctype == IS_COLORING_GLOBAL) {
446     if (!dd->localcoloring) {
447       ierr = PetscMalloc(nc*nx*ny*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
448       ii = 0;
449       for (j=ys; j<ys+ny; j++) {
450 	for (i=xs; i<xs+nx; i++) {
451 	  for (k=0; k<nc; k++) {
452 	    colors[ii++] = k + nc*((3*j+i) % 5);
453 	  }
454 	}
455       }
456       ncolors = 5*nc;
457       ierr = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,&dd->localcoloring);CHKERRQ(ierr);
458     }
459     *coloring = dd->localcoloring;
460   } else if (ctype == IS_COLORING_GHOSTED) {
461     if (!dd->ghostedcoloring) {
462       ierr = PetscMalloc(nc*gnx*gny*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
463       ii = 0;
464       for (j=gys; j<gys+gny; j++) {
465 	for (i=gxs; i<gxs+gnx; i++) {
466 	  for (k=0; k<nc; k++) {
467 	    colors[ii++] = k + nc*((3*SetInRange(j,n) + SetInRange(i,m)) % 5);
468 	  }
469 	}
470       }
471       ncolors = 5*nc;
472       ierr = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,&dd->ghostedcoloring);CHKERRQ(ierr);
473       ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_GHOSTED);CHKERRQ(ierr);
474     }
475     *coloring = dd->ghostedcoloring;
476   } else SETERRQ1(((PetscObject)da)->comm,PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
477   PetscFunctionReturn(0);
478 }
479 
480 /* =========================================================================== */
481 extern PetscErrorCode DMGetMatrix_DA_1d_MPIAIJ(DM,Mat);
482 extern PetscErrorCode DMGetMatrix_DA_2d_MPIAIJ(DM,Mat);
483 extern PetscErrorCode DMGetMatrix_DA_2d_MPIAIJ_Fill(DM,Mat);
484 extern PetscErrorCode DMGetMatrix_DA_3d_MPIAIJ(DM,Mat);
485 extern PetscErrorCode DMGetMatrix_DA_3d_MPIAIJ_Fill(DM,Mat);
486 extern PetscErrorCode DMGetMatrix_DA_2d_MPIBAIJ(DM,Mat);
487 extern PetscErrorCode DMGetMatrix_DA_3d_MPIBAIJ(DM,Mat);
488 extern PetscErrorCode DMGetMatrix_DA_2d_MPISBAIJ(DM,Mat);
489 extern PetscErrorCode DMGetMatrix_DA_3d_MPISBAIJ(DM,Mat);
490 
491 #undef __FUNCT__
492 #define __FUNCT__ "MatSetDM"
493 /*@
494    MatSetDM - Sets the DMDA that is to be used by the HYPRE_StructMatrix PETSc matrix
495 
496    Logically Collective on Mat
497 
498    Input Parameters:
499 +  mat - the matrix
500 -  da - the da
501 
502    Level: intermediate
503 
504 @*/
505 PetscErrorCode  MatSetDM(Mat mat,DM da)
506 {
507   PetscErrorCode ierr;
508 
509   PetscFunctionBegin;
510   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
511   PetscValidHeaderSpecific(da,DM_CLASSID,1);
512   ierr = PetscTryMethod(mat,"MatSetDM_C",(Mat,DM),(mat,da));CHKERRQ(ierr);
513   PetscFunctionReturn(0);
514 }
515 
516 EXTERN_C_BEGIN
517 #undef __FUNCT__
518 #define __FUNCT__ "MatView_MPI_DA"
519 PetscErrorCode  MatView_MPI_DA(Mat A,PetscViewer viewer)
520 {
521   DM             da;
522   PetscErrorCode ierr;
523   const char     *prefix;
524   Mat            Anatural;
525   AO             ao;
526   PetscInt       rstart,rend,*petsc,i;
527   IS             is;
528   MPI_Comm       comm;
529 
530   PetscFunctionBegin;
531   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
532   ierr = PetscObjectQuery((PetscObject)A,"DM",(PetscObject*)&da);CHKERRQ(ierr);
533   if (!da) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
534 
535   ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);
536   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
537   ierr = PetscMalloc((rend-rstart)*sizeof(PetscInt),&petsc);CHKERRQ(ierr);
538   for (i=rstart; i<rend; i++) petsc[i-rstart] = i;
539   ierr = AOApplicationToPetsc(ao,rend-rstart,petsc);CHKERRQ(ierr);
540   ierr = ISCreateGeneral(comm,rend-rstart,petsc,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
541 
542   /* call viewer on natural ordering */
543   ierr = MatGetSubMatrix(A,is,is,MAT_INITIAL_MATRIX,&Anatural);CHKERRQ(ierr);
544   ierr = ISDestroy(&is);CHKERRQ(ierr);
545   ierr = PetscObjectGetOptionsPrefix((PetscObject)A,&prefix);CHKERRQ(ierr);
546   ierr = PetscObjectSetOptionsPrefix((PetscObject)Anatural,prefix);CHKERRQ(ierr);
547   ierr = PetscObjectSetName((PetscObject)Anatural,((PetscObject)A)->name);CHKERRQ(ierr);
548   ierr = MatView(Anatural,viewer);CHKERRQ(ierr);
549   ierr = MatDestroy(&Anatural);CHKERRQ(ierr);
550   PetscFunctionReturn(0);
551 }
552 EXTERN_C_END
553 
554 EXTERN_C_BEGIN
555 #undef __FUNCT__
556 #define __FUNCT__ "MatLoad_MPI_DA"
557 PetscErrorCode  MatLoad_MPI_DA(Mat A,PetscViewer viewer)
558 {
559   DM             da;
560   PetscErrorCode ierr;
561   Mat            Anatural,Aapp;
562   AO             ao;
563   PetscInt       rstart,rend,*app,i;
564   IS             is;
565   MPI_Comm       comm;
566 
567   PetscFunctionBegin;
568   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
569   ierr = PetscObjectQuery((PetscObject)A,"DM",(PetscObject*)&da);CHKERRQ(ierr);
570   if (!da) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
571 
572   /* Load the matrix in natural ordering */
573   ierr = MatCreate(((PetscObject)A)->comm,&Anatural);CHKERRQ(ierr);
574   ierr = MatSetType(Anatural,((PetscObject)A)->type_name);CHKERRQ(ierr);
575   ierr = MatSetSizes(Anatural,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
576   ierr = MatLoad(Anatural,viewer);CHKERRQ(ierr);
577 
578   /* Map natural ordering to application ordering and create IS */
579   ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);
580   ierr = MatGetOwnershipRange(Anatural,&rstart,&rend);CHKERRQ(ierr);
581   ierr = PetscMalloc((rend-rstart)*sizeof(PetscInt),&app);CHKERRQ(ierr);
582   for (i=rstart; i<rend; i++) app[i-rstart] = i;
583   ierr = AOPetscToApplication(ao,rend-rstart,app);CHKERRQ(ierr);
584   ierr = ISCreateGeneral(comm,rend-rstart,app,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
585 
586   /* Do permutation and replace header */
587   ierr = MatGetSubMatrix(Anatural,is,is,MAT_INITIAL_MATRIX,&Aapp);CHKERRQ(ierr);
588   ierr = MatHeaderReplace(A,Aapp);CHKERRQ(ierr);
589   ierr = ISDestroy(&is);CHKERRQ(ierr);
590   ierr = MatDestroy(&Anatural);CHKERRQ(ierr);
591   PetscFunctionReturn(0);
592 }
593 EXTERN_C_END
594 
595 #undef __FUNCT__
596 #define __FUNCT__ "DMGetMatrix_DA"
597 PetscErrorCode  DMGetMatrix_DA(DM da, const MatType mtype,Mat *J)
598 {
599   PetscErrorCode ierr;
600   PetscInt       dim,dof,nx,ny,nz,dims[3],starts[3],M,N,P;
601   Mat            A;
602   MPI_Comm       comm;
603   const MatType  Atype;
604   void           (*aij)(void)=PETSC_NULL,(*baij)(void)=PETSC_NULL,(*sbaij)(void)=PETSC_NULL;
605   MatType        ttype[256];
606   PetscBool      flg;
607   PetscMPIInt    size;
608   DM_DA          *dd = (DM_DA*)da->data;
609 
610   PetscFunctionBegin;
611 #ifndef PETSC_USE_DYNAMIC_LIBRARIES
612   ierr = MatInitializePackage(PETSC_NULL);CHKERRQ(ierr);
613 #endif
614   if (!mtype) mtype = MATAIJ;
615   ierr = PetscStrcpy((char*)ttype,mtype);CHKERRQ(ierr);
616   ierr = PetscOptionsBegin(((PetscObject)da)->comm,((PetscObject)da)->prefix,"DMDA options","Mat");CHKERRQ(ierr);
617   ierr = PetscOptionsList("-da_mat_type","Matrix type","MatSetType",MatList,mtype,(char*)ttype,256,&flg);CHKERRQ(ierr);
618   ierr = PetscOptionsEnd();
619 
620   /*
621                                   m
622           ------------------------------------------------------
623          |                                                     |
624          |                                                     |
625          |               ----------------------                |
626          |               |                    |                |
627       n  |           ny  |                    |                |
628          |               |                    |                |
629          |               .---------------------                |
630          |             (xs,ys)     nx                          |
631          |            .                                        |
632          |         (gxs,gys)                                   |
633          |                                                     |
634           -----------------------------------------------------
635   */
636 
637   /*
638          nc - number of components per grid point
639          col - number of colors needed in one direction for single component problem
640 
641   */
642   ierr = DMDAGetInfo(da,&dim,&M,&N,&P,0,0,0,&dof,0,0,0,0,0);CHKERRQ(ierr);
643   ierr = DMDAGetCorners(da,0,0,0,&nx,&ny,&nz);CHKERRQ(ierr);
644   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
645   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
646   ierr = MatSetSizes(A,dof*nx*ny*nz,dof*nx*ny*nz,dof*M*N*P,dof*M*N*P);CHKERRQ(ierr);
647   ierr = MatSetType(A,(const MatType)ttype);CHKERRQ(ierr);
648   ierr = MatSetDM(A,da);CHKERRQ(ierr);
649   ierr = MatSetFromOptions(A);CHKERRQ(ierr);
650   ierr = MatGetType(A,&Atype);CHKERRQ(ierr);
651   /*
652      We do not provide a getmatrix function in the DMDA operations because
653    the basic DMDA does not know about matrices. We think of DMDA as being more
654    more low-level than matrices. This is kind of cheating but, cause sometimes
655    we think of DMDA has higher level than matrices.
656 
657      We could switch based on Atype (or mtype), but we do not since the
658    specialized setting routines depend only the particular preallocation
659    details of the matrix, not the type itself.
660   */
661   ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJSetPreallocation_C",&aij);CHKERRQ(ierr);
662   if (!aij) {
663     ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJSetPreallocation_C",&aij);CHKERRQ(ierr);
664   }
665   if (!aij) {
666     ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIBAIJSetPreallocation_C",&baij);CHKERRQ(ierr);
667     if (!baij) {
668       ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",&baij);CHKERRQ(ierr);
669     }
670     if (!baij){
671       ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPISBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr);
672       if (!sbaij) {
673         ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr);
674       }
675     }
676   }
677   if (aij) {
678     if (dim == 1) {
679       ierr = DMGetMatrix_DA_1d_MPIAIJ(da,A);CHKERRQ(ierr);
680     } else if (dim == 2) {
681       if (dd->ofill) {
682         ierr = DMGetMatrix_DA_2d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
683       } else {
684         ierr = DMGetMatrix_DA_2d_MPIAIJ(da,A);CHKERRQ(ierr);
685       }
686     } else if (dim == 3) {
687       if (dd->ofill) {
688         ierr = DMGetMatrix_DA_3d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
689       } else {
690         ierr = DMGetMatrix_DA_3d_MPIAIJ(da,A);CHKERRQ(ierr);
691       }
692     }
693   } else if (baij) {
694     if (dim == 2) {
695       ierr = DMGetMatrix_DA_2d_MPIBAIJ(da,A);CHKERRQ(ierr);
696     } else if (dim == 3) {
697       ierr = DMGetMatrix_DA_3d_MPIBAIJ(da,A);CHKERRQ(ierr);
698     } else {
699       SETERRQ3(((PetscObject)da)->comm,PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension!\n" \
700 	       "Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
701     }
702   } else if (sbaij) {
703     if (dim == 2) {
704       ierr = DMGetMatrix_DA_2d_MPISBAIJ(da,A);CHKERRQ(ierr);
705     } else if (dim == 3) {
706       ierr = DMGetMatrix_DA_3d_MPISBAIJ(da,A);CHKERRQ(ierr);
707     } else {
708       SETERRQ3(((PetscObject)da)->comm,PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension!\n" \
709 	       "Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
710     }
711   }
712   ierr = DMDAGetGhostCorners(da,&starts[0],&starts[1],&starts[2],&dims[0],&dims[1],&dims[2]);CHKERRQ(ierr);
713   ierr = MatSetStencil(A,dim,dims,starts,dof);CHKERRQ(ierr);
714   ierr = PetscObjectCompose((PetscObject)A,"DM",(PetscObject)da);CHKERRQ(ierr);
715   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
716   if (size > 1) {
717     /* change viewer to display matrix in natural ordering */
718     ierr = MatShellSetOperation(A, MATOP_VIEW, (void (*)(void)) MatView_MPI_DA);CHKERRQ(ierr);
719     ierr = MatShellSetOperation(A, MATOP_LOAD, (void (*)(void)) MatLoad_MPI_DA);CHKERRQ(ierr);
720   }
721   *J = A;
722   PetscFunctionReturn(0);
723 }
724 
725 /* ---------------------------------------------------------------------------------*/
726 #undef __FUNCT__
727 #define __FUNCT__ "DMGetMatrix_DA_2d_MPIAIJ"
728 PetscErrorCode DMGetMatrix_DA_2d_MPIAIJ(DM da,Mat J)
729 {
730   PetscErrorCode         ierr;
731   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = PETSC_NULL,k,nc,*rows = PETSC_NULL,col,cnt,l,p;
732   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
733   MPI_Comm               comm;
734   PetscScalar            *values;
735   DMDABoundaryType       bx,by;
736   ISLocalToGlobalMapping ltog,ltogb;
737   DMDAStencilType        st;
738 
739   PetscFunctionBegin;
740   /*
741          nc - number of components per grid point
742          col - number of colors needed in one direction for single component problem
743 
744   */
745   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
746   col = 2*s + 1;
747   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
748   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
749   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
750 
751   ierr = PetscMalloc2(nc,PetscInt,&rows,col*col*nc*nc,PetscInt,&cols);CHKERRQ(ierr);
752   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
753   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
754 
755   /* determine the matrix preallocation information */
756   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
757   for (i=xs; i<xs+nx; i++) {
758 
759     pstart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
760     pend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
761 
762     for (j=ys; j<ys+ny; j++) {
763       slot = i - gxs + gnx*(j - gys);
764 
765       lstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
766       lend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
767 
768       cnt  = 0;
769       for (k=0; k<nc; k++) {
770 	for (l=lstart; l<lend+1; l++) {
771 	  for (p=pstart; p<pend+1; p++) {
772 	    if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
773 	      cols[cnt++]  = k + nc*(slot + gnx*l + p);
774 	    }
775 	  }
776 	}
777 	rows[k] = k + nc*(slot);
778       }
779       ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
780     }
781   }
782   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
783   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
784   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
785   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
786 
787   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
788   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
789 
790   /*
791     For each node in the grid: we get the neighbors in the local (on processor ordering
792     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
793     PETSc ordering.
794   */
795   if (!da->prealloc_only) {
796     ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
797     ierr = PetscMemzero(values,col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
798     for (i=xs; i<xs+nx; i++) {
799 
800       pstart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
801       pend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
802 
803       for (j=ys; j<ys+ny; j++) {
804 	slot = i - gxs + gnx*(j - gys);
805 
806 	lstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
807 	lend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
808 
809 	cnt  = 0;
810 	for (k=0; k<nc; k++) {
811 	  for (l=lstart; l<lend+1; l++) {
812 	    for (p=pstart; p<pend+1; p++) {
813 	      if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
814 		cols[cnt++]  = k + nc*(slot + gnx*l + p);
815 	      }
816 	    }
817 	  }
818 	  rows[k]      = k + nc*(slot);
819 	}
820 	ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
821       }
822     }
823     ierr = PetscFree(values);CHKERRQ(ierr);
824     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
825     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
826   }
827   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
828   PetscFunctionReturn(0);
829 }
830 
831 #undef __FUNCT__
832 #define __FUNCT__ "DMGetMatrix_DA_2d_MPIAIJ_Fill"
833 PetscErrorCode DMGetMatrix_DA_2d_MPIAIJ_Fill(DM da,Mat J)
834 {
835   PetscErrorCode         ierr;
836   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
837   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt,l,p;
838   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
839   DM_DA                  *dd = (DM_DA*)da->data;
840   PetscInt               ifill_col,*ofill = dd->ofill, *dfill = dd->dfill;
841   MPI_Comm               comm;
842   PetscScalar            *values;
843   DMDABoundaryType       bx,by;
844   ISLocalToGlobalMapping ltog,ltogb;
845   DMDAStencilType        st;
846 
847   PetscFunctionBegin;
848   /*
849          nc - number of components per grid point
850          col - number of colors needed in one direction for single component problem
851 
852   */
853   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
854   col = 2*s + 1;
855   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
856   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
857   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
858 
859   ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscInt),&cols);CHKERRQ(ierr);
860   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
861   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
862 
863   /* determine the matrix preallocation information */
864   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
865   for (i=xs; i<xs+nx; i++) {
866 
867     pstart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
868     pend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
869 
870     for (j=ys; j<ys+ny; j++) {
871       slot = i - gxs + gnx*(j - gys);
872 
873       lstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
874       lend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
875 
876       for (k=0; k<nc; k++) {
877         cnt  = 0;
878 	for (l=lstart; l<lend+1; l++) {
879 	  for (p=pstart; p<pend+1; p++) {
880             if (l || p) {
881 	      if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
882                 for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++)
883 		  cols[cnt++]  = ofill[ifill_col] + nc*(slot + gnx*l + p);
884 	      }
885             } else {
886 	      if (dfill) {
887 		for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++)
888 		  cols[cnt++]  = dfill[ifill_col] + nc*(slot + gnx*l + p);
889 	      } else {
890 		for (ifill_col=0; ifill_col<nc; ifill_col++)
891 		  cols[cnt++]  = ifill_col + nc*(slot + gnx*l + p);
892 	      }
893             }
894 	  }
895 	}
896 	row = k + nc*(slot);
897         ierr = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
898       }
899     }
900   }
901   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
902   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
903   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
904   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
905   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
906 
907   /*
908     For each node in the grid: we get the neighbors in the local (on processor ordering
909     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
910     PETSc ordering.
911   */
912   if (!da->prealloc_only) {
913     ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
914     ierr = PetscMemzero(values,col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
915     for (i=xs; i<xs+nx; i++) {
916 
917       pstart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
918       pend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
919 
920       for (j=ys; j<ys+ny; j++) {
921 	slot = i - gxs + gnx*(j - gys);
922 
923 	lstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
924 	lend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
925 
926 	for (k=0; k<nc; k++) {
927 	  cnt  = 0;
928 	  for (l=lstart; l<lend+1; l++) {
929 	    for (p=pstart; p<pend+1; p++) {
930 	      if (l || p) {
931 		if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
932 		  for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++)
933 		    cols[cnt++]  = ofill[ifill_col] + nc*(slot + gnx*l + p);
934 		}
935 	      } else {
936 		if (dfill) {
937 		  for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++)
938 		    cols[cnt++]  = dfill[ifill_col] + nc*(slot + gnx*l + p);
939 		} else {
940 		  for (ifill_col=0; ifill_col<nc; ifill_col++)
941 		    cols[cnt++]  = ifill_col + nc*(slot + gnx*l + p);
942 		}
943 	      }
944 	    }
945 	  }
946 	  row  = k + nc*(slot);
947 	  ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
948 	}
949       }
950     }
951     ierr = PetscFree(values);CHKERRQ(ierr);
952     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
953     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
954   }
955   ierr = PetscFree(cols);CHKERRQ(ierr);
956   PetscFunctionReturn(0);
957 }
958 
959 /* ---------------------------------------------------------------------------------*/
960 
961 #undef __FUNCT__
962 #define __FUNCT__ "DMGetMatrix_DA_3d_MPIAIJ"
963 PetscErrorCode DMGetMatrix_DA_3d_MPIAIJ(DM da,Mat J)
964 {
965   PetscErrorCode         ierr;
966   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
967   PetscInt               m,n,dim,s,*cols = PETSC_NULL,k,nc,*rows = PETSC_NULL,col,cnt,l,p,*dnz = PETSC_NULL,*onz = PETSC_NULL;
968   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
969   MPI_Comm               comm;
970   PetscScalar            *values;
971   DMDABoundaryType       bx,by,bz;
972   ISLocalToGlobalMapping ltog,ltogb;
973   DMDAStencilType        st;
974 
975   PetscFunctionBegin;
976   /*
977          nc - number of components per grid point
978          col - number of colors needed in one direction for single component problem
979 
980   */
981   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
982   col    = 2*s + 1;
983 
984   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
985   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
986   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
987 
988   ierr = PetscMalloc2(nc,PetscInt,&rows,col*col*col*nc*nc,PetscInt,&cols);CHKERRQ(ierr);
989   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
990   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
991 
992   /* determine the matrix preallocation information */
993   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
994   for (i=xs; i<xs+nx; i++) {
995     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
996     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
997     for (j=ys; j<ys+ny; j++) {
998       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
999       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1000       for (k=zs; k<zs+nz; k++) {
1001 	kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1002 	kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1003 
1004 	slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1005 
1006 	cnt  = 0;
1007 	for (l=0; l<nc; l++) {
1008 	  for (ii=istart; ii<iend+1; ii++) {
1009 	    for (jj=jstart; jj<jend+1; jj++) {
1010 	      for (kk=kstart; kk<kend+1; kk++) {
1011 		if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1012 		  cols[cnt++]  = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1013 		}
1014 	      }
1015 	    }
1016 	  }
1017 	  rows[l] = l + nc*(slot);
1018 	}
1019 	ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1020       }
1021     }
1022   }
1023   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1024   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1025   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1026   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1027   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1028   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1029 
1030   /*
1031     For each node in the grid: we get the neighbors in the local (on processor ordering
1032     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1033     PETSc ordering.
1034   */
1035   if (!da->prealloc_only) {
1036     ierr = PetscMalloc(col*col*col*nc*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1037     ierr = PetscMemzero(values,col*col*col*nc*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1038     for (i=xs; i<xs+nx; i++) {
1039       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1040       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1041       for (j=ys; j<ys+ny; j++) {
1042 	jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1043 	jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1044 	for (k=zs; k<zs+nz; k++) {
1045 	  kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1046 	  kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1047 
1048 	  slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1049 
1050 	  cnt  = 0;
1051 	  for (l=0; l<nc; l++) {
1052 	    for (ii=istart; ii<iend+1; ii++) {
1053 	      for (jj=jstart; jj<jend+1; jj++) {
1054 		for (kk=kstart; kk<kend+1; kk++) {
1055 		  if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1056 		    cols[cnt++]  = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1057 		  }
1058 		}
1059 	      }
1060 	    }
1061 	    rows[l]      = l + nc*(slot);
1062 	  }
1063 	  ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1064 	}
1065       }
1066     }
1067     ierr = PetscFree(values);CHKERRQ(ierr);
1068     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1069     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1070   }
1071   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1072   PetscFunctionReturn(0);
1073 }
1074 
1075 /* ---------------------------------------------------------------------------------*/
1076 
1077 #undef __FUNCT__
1078 #define __FUNCT__ "DMGetMatrix_DA_1d_MPIAIJ"
1079 PetscErrorCode DMGetMatrix_DA_1d_MPIAIJ(DM da,Mat J)
1080 {
1081   PetscErrorCode         ierr;
1082   PetscInt               xs,nx,i,i1,slot,gxs,gnx;
1083   PetscInt               m,dim,s,*cols = PETSC_NULL,nc,*rows = PETSC_NULL,col,cnt,l;
1084   PetscInt               istart,iend;
1085   PetscScalar            *values;
1086   DMDABoundaryType       bx;
1087   ISLocalToGlobalMapping ltog,ltogb;
1088 
1089   PetscFunctionBegin;
1090   /*
1091          nc - number of components per grid point
1092          col - number of colors needed in one direction for single component problem
1093 
1094   */
1095   ierr = DMDAGetInfo(da,&dim,&m,0,0,0,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr);
1096   col    = 2*s + 1;
1097 
1098   ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
1099   ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
1100 
1101   ierr = MatSeqAIJSetPreallocation(J,col*nc,0);CHKERRQ(ierr);
1102   ierr = MatMPIAIJSetPreallocation(J,col*nc,0,col*nc,0);CHKERRQ(ierr);
1103   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1104   ierr = PetscMalloc2(nc,PetscInt,&rows,col*nc*nc,PetscInt,&cols);CHKERRQ(ierr);
1105 
1106   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1107   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1108   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1109   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1110 
1111   /*
1112     For each node in the grid: we get the neighbors in the local (on processor ordering
1113     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1114     PETSc ordering.
1115   */
1116   if (!da->prealloc_only) {
1117     ierr = PetscMalloc(col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1118     ierr = PetscMemzero(values,col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1119     for (i=xs; i<xs+nx; i++) {
1120       istart = PetscMax(-s,gxs - i);
1121       iend   = PetscMin(s,gxs + gnx - i - 1);
1122       slot   = i - gxs;
1123 
1124       cnt  = 0;
1125       for (l=0; l<nc; l++) {
1126 	for (i1=istart; i1<iend+1; i1++) {
1127 	  cols[cnt++] = l + nc*(slot + i1);
1128 	}
1129 	rows[l]      = l + nc*(slot);
1130       }
1131       ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1132     }
1133     ierr = PetscFree(values);CHKERRQ(ierr);
1134     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1135     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1136   }
1137   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1138   PetscFunctionReturn(0);
1139 }
1140 
1141 #undef __FUNCT__
1142 #define __FUNCT__ "DMGetMatrix_DA_2d_MPIBAIJ"
1143 PetscErrorCode DMGetMatrix_DA_2d_MPIBAIJ(DM da,Mat J)
1144 {
1145   PetscErrorCode         ierr;
1146   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1147   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
1148   PetscInt               istart,iend,jstart,jend,ii,jj;
1149   MPI_Comm               comm;
1150   PetscScalar            *values;
1151   DMDABoundaryType       bx,by;
1152   DMDAStencilType        st;
1153   ISLocalToGlobalMapping ltog,ltogb;
1154 
1155   PetscFunctionBegin;
1156   /*
1157      nc - number of components per grid point
1158      col - number of colors needed in one direction for single component problem
1159   */
1160   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
1161   col = 2*s + 1;
1162 
1163   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
1164   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
1165   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1166 
1167   ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1168 
1169   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1170   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1171 
1172   /* determine the matrix preallocation information */
1173   ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
1174   for (i=xs; i<xs+nx; i++) {
1175     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1176     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1177     for (j=ys; j<ys+ny; j++) {
1178       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1179       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1180       slot = i - gxs + gnx*(j - gys);
1181 
1182       /* Find block columns in block row */
1183       cnt  = 0;
1184       for (ii=istart; ii<iend+1; ii++) {
1185         for (jj=jstart; jj<jend+1; jj++) {
1186           if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1187             cols[cnt++]  = slot + ii + gnx*jj;
1188           }
1189         }
1190       }
1191       ierr = MatPreallocateSetLocal(ltogb,1,&slot,ltogb,cnt,cols,dnz,onz);CHKERRQ(ierr);
1192     }
1193   }
1194   ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1195   ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1196   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1197 
1198   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1199   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1200 
1201   /*
1202     For each node in the grid: we get the neighbors in the local (on processor ordering
1203     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1204     PETSc ordering.
1205   */
1206   if (!da->prealloc_only) {
1207     ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1208     ierr = PetscMemzero(values,col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1209     for (i=xs; i<xs+nx; i++) {
1210       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1211       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1212       for (j=ys; j<ys+ny; j++) {
1213 	jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1214 	jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1215 	slot = i - gxs + gnx*(j - gys);
1216 	cnt  = 0;
1217         for (ii=istart; ii<iend+1; ii++) {
1218           for (jj=jstart; jj<jend+1; jj++) {
1219             if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1220               cols[cnt++]  = slot + ii + gnx*jj;
1221             }
1222           }
1223         }
1224 	ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1225       }
1226     }
1227     ierr = PetscFree(values);CHKERRQ(ierr);
1228     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1229     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1230   }
1231   ierr = PetscFree(cols);CHKERRQ(ierr);
1232   PetscFunctionReturn(0);
1233 }
1234 
1235 #undef __FUNCT__
1236 #define __FUNCT__ "DMGetMatrix_DA_3d_MPIBAIJ"
1237 PetscErrorCode DMGetMatrix_DA_3d_MPIBAIJ(DM da,Mat J)
1238 {
1239   PetscErrorCode         ierr;
1240   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1241   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
1242   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1243   MPI_Comm               comm;
1244   PetscScalar            *values;
1245   DMDABoundaryType       bx,by,bz;
1246   DMDAStencilType        st;
1247   ISLocalToGlobalMapping ltog,ltogb;
1248 
1249   PetscFunctionBegin;
1250   /*
1251          nc - number of components per grid point
1252          col - number of colors needed in one direction for single component problem
1253 
1254   */
1255   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1256   col    = 2*s + 1;
1257 
1258   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1259   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1260   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1261 
1262   ierr  = PetscMalloc(col*col*col*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1263 
1264   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1265   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1266 
1267   /* determine the matrix preallocation information */
1268   ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1269   for (i=xs; i<xs+nx; i++) {
1270     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1271     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1272     for (j=ys; j<ys+ny; j++) {
1273       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1274       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1275       for (k=zs; k<zs+nz; k++) {
1276 	kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1277 	kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1278 
1279 	slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1280 
1281 	/* Find block columns in block row */
1282 	cnt  = 0;
1283         for (ii=istart; ii<iend+1; ii++) {
1284           for (jj=jstart; jj<jend+1; jj++) {
1285             for (kk=kstart; kk<kend+1; kk++) {
1286               if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1287 		cols[cnt++]  = slot + ii + gnx*jj + gnx*gny*kk;
1288 	      }
1289 	    }
1290 	  }
1291 	}
1292 	ierr = MatPreallocateSetLocal(ltogb,1,&slot,ltogb,cnt,cols,dnz,onz);CHKERRQ(ierr);
1293       }
1294     }
1295   }
1296   ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1297   ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1298   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1299 
1300   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1301   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1302 
1303   /*
1304     For each node in the grid: we get the neighbors in the local (on processor ordering
1305     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1306     PETSc ordering.
1307   */
1308   if (!da->prealloc_only) {
1309     ierr  = PetscMalloc(col*col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1310     ierr  = PetscMemzero(values,col*col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1311     for (i=xs; i<xs+nx; i++) {
1312       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1313       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1314       for (j=ys; j<ys+ny; j++) {
1315 	jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1316 	jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1317 	for (k=zs; k<zs+nz; k++) {
1318 	  kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1319 	  kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1320 
1321 	  slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1322 
1323 	  cnt  = 0;
1324           for (ii=istart; ii<iend+1; ii++) {
1325             for (jj=jstart; jj<jend+1; jj++) {
1326               for (kk=kstart; kk<kend+1; kk++) {
1327                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1328                   cols[cnt++]  = slot + ii + gnx*jj + gnx*gny*kk;
1329                 }
1330               }
1331             }
1332           }
1333 	  ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1334 	}
1335       }
1336     }
1337     ierr = PetscFree(values);CHKERRQ(ierr);
1338     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1339     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1340   }
1341   ierr = PetscFree(cols);CHKERRQ(ierr);
1342   PetscFunctionReturn(0);
1343 }
1344 
1345 #undef __FUNCT__
1346 #define __FUNCT__ "L2GFilterUpperTriangular"
1347 /*
1348   This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to
1349   identify in the local ordering with periodic domain.
1350 */
1351 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog,PetscInt *row,PetscInt *cnt,PetscInt col[])
1352 {
1353   PetscErrorCode ierr;
1354   PetscInt       i,n;
1355 
1356   PetscFunctionBegin;
1357   ierr = ISLocalToGlobalMappingApply(ltog,1,row,row);CHKERRQ(ierr);
1358   ierr = ISLocalToGlobalMappingApply(ltog,*cnt,col,col);CHKERRQ(ierr);
1359   for (i=0,n=0; i<*cnt; i++) {
1360     if (col[i] >= *row) col[n++] = col[i];
1361   }
1362   *cnt = n;
1363   PetscFunctionReturn(0);
1364 }
1365 
1366 #undef __FUNCT__
1367 #define __FUNCT__ "DMGetMatrix_DA_2d_MPISBAIJ"
1368 PetscErrorCode DMGetMatrix_DA_2d_MPISBAIJ(DM da,Mat J)
1369 {
1370   PetscErrorCode         ierr;
1371   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1372   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
1373   PetscInt               istart,iend,jstart,jend,ii,jj;
1374   MPI_Comm               comm;
1375   PetscScalar            *values;
1376   DMDABoundaryType       bx,by;
1377   DMDAStencilType        st;
1378   ISLocalToGlobalMapping ltog,ltogb;
1379 
1380   PetscFunctionBegin;
1381   /*
1382      nc - number of components per grid point
1383      col - number of colors needed in one direction for single component problem
1384   */
1385   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
1386   col = 2*s + 1;
1387 
1388   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
1389   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
1390   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1391 
1392   ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1393 
1394   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1395   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1396 
1397   /* determine the matrix preallocation information */
1398   ierr = MatPreallocateSymmetricInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
1399   for (i=xs; i<xs+nx; i++) {
1400     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1401     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1402     for (j=ys; j<ys+ny; j++) {
1403       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1404       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1405       slot = i - gxs + gnx*(j - gys);
1406 
1407       /* Find block columns in block row */
1408       cnt  = 0;
1409       for (ii=istart; ii<iend+1; ii++) {
1410         for (jj=jstart; jj<jend+1; jj++) {
1411           if (st == DMDA_STENCIL_BOX || !ii || !jj) {
1412             cols[cnt++]  = slot + ii + gnx*jj;
1413           }
1414         }
1415       }
1416       ierr = L2GFilterUpperTriangular(ltogb,&slot,&cnt,cols);CHKERRQ(ierr);
1417       ierr = MatPreallocateSymmetricSet(slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
1418     }
1419   }
1420   ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1421   ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1422   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1423 
1424   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1425   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1426 
1427   /*
1428     For each node in the grid: we get the neighbors in the local (on processor ordering
1429     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1430     PETSc ordering.
1431   */
1432   if (!da->prealloc_only) {
1433     ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1434     ierr = PetscMemzero(values,col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1435     for (i=xs; i<xs+nx; i++) {
1436       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1437       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1438       for (j=ys; j<ys+ny; j++) {
1439         jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1440         jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1441         slot = i - gxs + gnx*(j - gys);
1442 
1443         /* Find block columns in block row */
1444         cnt  = 0;
1445         for (ii=istart; ii<iend+1; ii++) {
1446           for (jj=jstart; jj<jend+1; jj++) {
1447             if (st == DMDA_STENCIL_BOX || !ii || !jj) {
1448               cols[cnt++]  = slot + ii + gnx*jj;
1449             }
1450           }
1451         }
1452         ierr = L2GFilterUpperTriangular(ltogb,&slot,&cnt,cols);CHKERRQ(ierr);
1453 	ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1454       }
1455     }
1456     ierr = PetscFree(values);CHKERRQ(ierr);
1457     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1458     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1459   }
1460   ierr = PetscFree(cols);CHKERRQ(ierr);
1461   PetscFunctionReturn(0);
1462 }
1463 
1464 #undef __FUNCT__
1465 #define __FUNCT__ "DMGetMatrix_DA_3d_MPISBAIJ"
1466 PetscErrorCode DMGetMatrix_DA_3d_MPISBAIJ(DM da,Mat J)
1467 {
1468   PetscErrorCode         ierr;
1469   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1470   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
1471   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1472   MPI_Comm               comm;
1473   PetscScalar            *values;
1474   DMDABoundaryType       bx,by,bz;
1475   DMDAStencilType        st;
1476   ISLocalToGlobalMapping ltog,ltogb;
1477 
1478   PetscFunctionBegin;
1479   /*
1480      nc - number of components per grid point
1481      col - number of colors needed in one direction for single component problem
1482   */
1483   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1484   col = 2*s + 1;
1485 
1486   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1487   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1488   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1489 
1490   /* create the matrix */
1491   ierr = PetscMalloc(col*col*col*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1492 
1493   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1494   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1495 
1496   /* determine the matrix preallocation information */
1497   ierr = MatPreallocateSymmetricInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1498   for (i=xs; i<xs+nx; i++) {
1499     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1500     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1501     for (j=ys; j<ys+ny; j++) {
1502       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1503       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1504       for (k=zs; k<zs+nz; k++) {
1505         kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1506 	kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1507 
1508 	slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1509 
1510 	/* Find block columns in block row */
1511 	cnt  = 0;
1512         for (ii=istart; ii<iend+1; ii++) {
1513           for (jj=jstart; jj<jend+1; jj++) {
1514             for (kk=kstart; kk<kend+1; kk++) {
1515               if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
1516                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1517               }
1518             }
1519           }
1520         }
1521         ierr = L2GFilterUpperTriangular(ltogb,&slot,&cnt,cols);CHKERRQ(ierr);
1522         ierr = MatPreallocateSymmetricSet(slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
1523       }
1524     }
1525   }
1526   ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1527   ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1528   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1529 
1530   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1531   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1532 
1533   /*
1534     For each node in the grid: we get the neighbors in the local (on processor ordering
1535     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1536     PETSc ordering.
1537   */
1538   if (!da->prealloc_only) {
1539     ierr = PetscMalloc(col*col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1540     ierr = PetscMemzero(values,col*col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1541     for (i=xs; i<xs+nx; i++) {
1542       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1543       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1544       for (j=ys; j<ys+ny; j++) {
1545         jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1546 	jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1547 	for (k=zs; k<zs+nz; k++) {
1548           kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1549 	  kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1550 
1551 	  slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1552 
1553 	  cnt  = 0;
1554           for (ii=istart; ii<iend+1; ii++) {
1555             for (jj=jstart; jj<jend+1; jj++) {
1556               for (kk=kstart; kk<kend+1; kk++) {
1557                 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
1558 		  cols[cnt++]  = slot + ii + gnx*jj + gnx*gny*kk;
1559 		}
1560 	      }
1561 	    }
1562 	  }
1563           ierr = L2GFilterUpperTriangular(ltogb,&slot,&cnt,cols);CHKERRQ(ierr);
1564           ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1565 	}
1566       }
1567     }
1568     ierr = PetscFree(values);CHKERRQ(ierr);
1569     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1570     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1571   }
1572   ierr = PetscFree(cols);CHKERRQ(ierr);
1573   PetscFunctionReturn(0);
1574 }
1575 
1576 /* ---------------------------------------------------------------------------------*/
1577 
1578 #undef __FUNCT__
1579 #define __FUNCT__ "DMGetMatrix_DA_3d_MPIAIJ_Fill"
1580 PetscErrorCode DMGetMatrix_DA_3d_MPIAIJ_Fill(DM da,Mat J)
1581 {
1582   PetscErrorCode         ierr;
1583   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1584   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt,l,p,*dnz,*onz;
1585   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1586   DM_DA                  *dd = (DM_DA*)da->data;
1587   PetscInt               ifill_col,*dfill = dd->dfill,*ofill = dd->ofill;
1588   MPI_Comm               comm;
1589   PetscScalar            *values;
1590   DMDABoundaryType       bx,by,bz;
1591   ISLocalToGlobalMapping ltog,ltogb;
1592   DMDAStencilType        st;
1593 
1594   PetscFunctionBegin;
1595   /*
1596          nc - number of components per grid point
1597          col - number of colors needed in one direction for single component problem
1598 
1599   */
1600   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1601   col    = 2*s + 1;
1602   if (bx == DMDA_BOUNDARY_PERIODIC && (m % col)){
1603     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
1604                  by 2*stencil_width + 1\n");
1605   }
1606   if (by == DMDA_BOUNDARY_PERIODIC && (n % col)){
1607     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
1608                  by 2*stencil_width + 1\n");
1609   }
1610   if (bz == DMDA_BOUNDARY_PERIODIC && (p % col)){
1611     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
1612                  by 2*stencil_width + 1\n");
1613   }
1614 
1615   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1616   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1617   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1618 
1619   ierr = PetscMalloc(col*col*col*nc*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1620   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1621   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1622 
1623   /* determine the matrix preallocation information */
1624   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1625 
1626 
1627   for (i=xs; i<xs+nx; i++) {
1628     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1629     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1630     for (j=ys; j<ys+ny; j++) {
1631       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1632       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1633       for (k=zs; k<zs+nz; k++) {
1634         kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1635         kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1636 
1637         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1638 
1639 	for (l=0; l<nc; l++) {
1640 	  cnt  = 0;
1641 	  for (ii=istart; ii<iend+1; ii++) {
1642 	    for (jj=jstart; jj<jend+1; jj++) {
1643 	      for (kk=kstart; kk<kend+1; kk++) {
1644 		if (ii || jj || kk) {
1645 		  if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1646 		    for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++)
1647 		      cols[cnt++]  = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1648 		  }
1649 		} else {
1650 		  if (dfill) {
1651 		    for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++)
1652 		      cols[cnt++]  = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1653 		  } else {
1654 		    for (ifill_col=0; ifill_col<nc; ifill_col++)
1655 		      cols[cnt++]  = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1656 		  }
1657 		}
1658 	      }
1659 	    }
1660 	  }
1661 	  row  = l + nc*(slot);
1662 	  ierr = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1663 	}
1664       }
1665     }
1666   }
1667   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1668   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1669   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1670   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1671   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1672 
1673   /*
1674     For each node in the grid: we get the neighbors in the local (on processor ordering
1675     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1676     PETSc ordering.
1677   */
1678   if (!da->prealloc_only) {
1679     ierr = PetscMalloc(col*col*col*nc*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1680     ierr = PetscMemzero(values,col*col*col*nc*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1681     for (i=xs; i<xs+nx; i++) {
1682       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1683       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1684       for (j=ys; j<ys+ny; j++) {
1685 	jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1686 	jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1687 	for (k=zs; k<zs+nz; k++) {
1688 	  kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1689 	  kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1690 
1691 	  slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1692 
1693 	  for (l=0; l<nc; l++) {
1694 	    cnt  = 0;
1695 	    for (ii=istart; ii<iend+1; ii++) {
1696 	      for (jj=jstart; jj<jend+1; jj++) {
1697 		for (kk=kstart; kk<kend+1; kk++) {
1698 		  if (ii || jj || kk) {
1699 		    if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1700 		      for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++)
1701 			cols[cnt++]  = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1702 		    }
1703 		  } else {
1704 		    if (dfill) {
1705 		      for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++)
1706 			cols[cnt++]  = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1707 		    } else {
1708 		      for (ifill_col=0; ifill_col<nc; ifill_col++)
1709 			cols[cnt++]  = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1710 		    }
1711 		  }
1712 		}
1713 	      }
1714 	    }
1715 	    row  = l + nc*(slot);
1716 	    ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1717 	  }
1718 	}
1719       }
1720     }
1721     ierr = PetscFree(values);CHKERRQ(ierr);
1722     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1723     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1724   }
1725   ierr = PetscFree(cols);CHKERRQ(ierr);
1726   PetscFunctionReturn(0);
1727 }
1728