xref: /petsc/src/dm/impls/da/fdda.c (revision 520db06cfec378d7c34fb86a68534e48a6477cdf)
1 #define PETSCDM_DLL
2 
3 #include "private/daimpl.h" /*I      "petscdm.h"     I*/
4 #include "petscmat.h"         /*I      "petscmat.h"    I*/
5 #include "private/matimpl.h"
6 
7 EXTERN PetscErrorCode DMGetColoring_DA_1d_MPIAIJ(DM,ISColoringType,ISColoring *);
8 EXTERN PetscErrorCode DMGetColoring_DA_2d_MPIAIJ(DM,ISColoringType,ISColoring *);
9 EXTERN PetscErrorCode DMGetColoring_DA_2d_5pt_MPIAIJ(DM,ISColoringType,ISColoring *);
10 EXTERN PetscErrorCode DMGetColoring_DA_3d_MPIAIJ(DM,ISColoringType,ISColoring *);
11 
12 /*
13    For ghost i that may be negative or greater than the upper bound this
14   maps it into the 0:m-1 range using periodicity
15 */
16 #define SetInRange(i,m) ((i < 0) ? m+i:((i >= m) ? i-m:i))
17 
18 #undef __FUNCT__
19 #define __FUNCT__ "DMDASetBlockFills_Private"
20 static PetscErrorCode DMDASetBlockFills_Private(PetscInt *dfill,PetscInt w,PetscInt **rfill)
21 {
22   PetscErrorCode ierr;
23   PetscInt       i,j,nz,*fill;
24 
25   PetscFunctionBegin;
26   if (!dfill) PetscFunctionReturn(0);
27 
28   /* count number nonzeros */
29   nz = 0;
30   for (i=0; i<w; i++) {
31     for (j=0; j<w; j++) {
32       if (dfill[w*i+j]) nz++;
33     }
34   }
35   ierr = PetscMalloc((nz + w + 1)*sizeof(PetscInt),&fill);CHKERRQ(ierr);
36   /* construct modified CSR storage of nonzero structure */
37   nz = w + 1;
38   for (i=0; i<w; i++) {
39     fill[i] = nz;
40     for (j=0; j<w; j++) {
41       if (dfill[w*i+j]) {
42 	fill[nz] = j;
43 	nz++;
44       }
45     }
46   }
47   fill[w] = nz;
48 
49   *rfill = fill;
50   PetscFunctionReturn(0);
51 }
52 
53 #undef __FUNCT__
54 #define __FUNCT__ "DMDASetMatPreallocateOnly"
55 /*@
56     DMDASetMatPreallocateOnly - When DMGetMatrix() is called the matrix will be properly
57        preallocated but the nonzero structure and zero values will not be set.
58 
59     Logically Collective on DMDA
60 
61     Input Parameter:
62 +   da - the distributed array
63 -   only - PETSC_TRUE if only want preallocation
64 
65 
66     Level: developer
67 
68 .seealso DMGetMatrix(), DMDASetGetMatrix(), DMDASetBlockSize(), DMDASetBlockFills()
69 
70 @*/
71 PetscErrorCode PETSCDM_DLLEXPORT DMDASetMatPreallocateOnly(DM da,PetscBool  only)
72 {
73   DM_DA *dd = (DM_DA*)da->data;
74 
75   PetscFunctionBegin;
76   dd->prealloc_only = only;
77   PetscFunctionReturn(0);
78 }
79 
80 #undef __FUNCT__
81 #define __FUNCT__ "DMDASetBlockFills"
82 /*@
83     DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem
84     of the matrix returned by DMGetMatrix().
85 
86     Logically Collective on DMDA
87 
88     Input Parameter:
89 +   da - the distributed array
90 .   dfill - the fill pattern in the diagonal block (may be PETSC_NULL, means use dense block)
91 -   ofill - the fill pattern in the off-diagonal blocks
92 
93 
94     Level: developer
95 
96     Notes: This only makes sense when you are doing multicomponent problems but using the
97        MPIAIJ matrix format
98 
99            The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries
100        representing coupling and 0 entries for missing coupling. For example
101 $             dfill[9] = {1, 0, 0,
102 $                         1, 1, 0,
103 $                         0, 1, 1}
104        means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with
105        itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the
106        diagonal block).
107 
108      DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
109      can be represented in the dfill, ofill format
110 
111    Contributed by Glenn Hammond
112 
113 .seealso DMGetMatrix(), DMDASetGetMatrix(), DMDASetMatPreallocateOnly()
114 
115 @*/
116 PetscErrorCode PETSCDM_DLLEXPORT DMDASetBlockFills(DM da,PetscInt *dfill,PetscInt *ofill)
117 {
118   DM_DA          *dd = (DM_DA*)da->data;
119   PetscErrorCode ierr;
120 
121   PetscFunctionBegin;
122   ierr = DMDASetBlockFills_Private(dfill,dd->w,&dd->dfill);CHKERRQ(ierr);
123   ierr = DMDASetBlockFills_Private(ofill,dd->w,&dd->ofill);CHKERRQ(ierr);
124   PetscFunctionReturn(0);
125 }
126 
127 
128 #undef __FUNCT__
129 #define __FUNCT__ "DMGetColoring_DA"
130 PetscErrorCode PETSCDM_DLLEXPORT DMGetColoring_DA(DM da,ISColoringType ctype,const MatType mtype,ISColoring *coloring)
131 {
132   PetscErrorCode   ierr;
133   PetscInt         dim,m,n,p,nc;
134   DMDAPeriodicType wrap;
135   MPI_Comm         comm;
136   PetscMPIInt      size;
137   PetscBool        isBAIJ;
138   DM_DA            *dd = (DM_DA*)da->data;
139 
140   PetscFunctionBegin;
141   /*
142                                   m
143           ------------------------------------------------------
144          |                                                     |
145          |                                                     |
146          |               ----------------------                |
147          |               |                    |                |
148       n  |           yn  |                    |                |
149          |               |                    |                |
150          |               .---------------------                |
151          |             (xs,ys)     xn                          |
152          |            .                                        |
153          |         (gxs,gys)                                   |
154          |                                                     |
155           -----------------------------------------------------
156   */
157 
158   /*
159          nc - number of components per grid point
160          col - number of colors needed in one direction for single component problem
161 
162   */
163   ierr = DMDAGetInfo(da,&dim,0,0,0,&m,&n,&p,&nc,0,&wrap,0);CHKERRQ(ierr);
164 
165   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
166   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
167   if (ctype == IS_COLORING_GHOSTED){
168     if (size == 1) {
169       ctype = IS_COLORING_GLOBAL;
170     } else if (dim > 1){
171       if ((m==1 && DMDAXPeriodic(wrap)) || (n==1 && DMDAYPeriodic(wrap)) || (p==1 && DMDAZPeriodic(wrap))){
172         SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"IS_COLORING_GHOSTED cannot be used for periodic boundary condition having both ends of the domain  on the same process");
173       }
174     }
175   }
176 
177   /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ
178      matrices is for the blocks, not the individual matrix elements  */
179   ierr = PetscStrcmp(mtype,MATBAIJ,&isBAIJ);CHKERRQ(ierr);
180   if (!isBAIJ) {ierr = PetscStrcmp(mtype,MATMPIBAIJ,&isBAIJ);CHKERRQ(ierr);}
181   if (!isBAIJ) {ierr = PetscStrcmp(mtype,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);}
182   if (isBAIJ) {
183     dd->w = 1;
184     dd->xs = dd->xs/nc;
185     dd->xe = dd->xe/nc;
186     dd->Xs = dd->Xs/nc;
187     dd->Xe = dd->Xe/nc;
188   }
189 
190   /*
191      We do not provide a getcoloring function in the DMDA operations because
192    the basic DMDA does not know about matrices. We think of DMDA as being more
193    more low-level then matrices.
194   */
195   if (dim == 1) {
196     ierr = DMGetColoring_DA_1d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
197   } else if (dim == 2) {
198     ierr =  DMGetColoring_DA_2d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
199   } else if (dim == 3) {
200     ierr =  DMGetColoring_DA_3d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
201   } else {
202     SETERRQ1(((PetscObject)da)->comm,PETSC_ERR_SUP,"Not done for %D dimension, send us mail petsc-maint@mcs.anl.gov for code",dim);
203   }
204   if (isBAIJ) {
205     dd->w = nc;
206     dd->xs = dd->xs*nc;
207     dd->xe = dd->xe*nc;
208     dd->Xs = dd->Xs*nc;
209     dd->Xe = dd->Xe*nc;
210   }
211   PetscFunctionReturn(0);
212 }
213 
214 /* ---------------------------------------------------------------------------------*/
215 
216 #undef __FUNCT__
217 #define __FUNCT__ "DMGetColoring_DA_2d_MPIAIJ"
218 PetscErrorCode DMGetColoring_DA_2d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
219 {
220   PetscErrorCode         ierr;
221   PetscInt               xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,M,N,dim,s,k,nc,col;
222   PetscInt               ncolors;
223   MPI_Comm               comm;
224   DMDAPeriodicType       wrap;
225   DMDAStencilType        st;
226   ISColoringValue        *colors;
227   DM_DA                  *dd = (DM_DA*)da->data;
228 
229   PetscFunctionBegin;
230   /*
231          nc - number of components per grid point
232          col - number of colors needed in one direction for single component problem
233 
234   */
235   ierr = DMDAGetInfo(da,&dim,&m,&n,0,&M,&N,0,&nc,&s,&wrap,&st);CHKERRQ(ierr);
236   col    = 2*s + 1;
237   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
238   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
239   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
240 
241   /* special case as taught to us by Paul Hovland */
242   if (st == DMDA_STENCIL_STAR && s == 1) {
243     ierr = DMGetColoring_DA_2d_5pt_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
244   } else {
245 
246     if (DMDAXPeriodic(wrap) && (m % col)){
247       SETERRQ2(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X (%d) is divisible\n\
248                  by 2*stencil_width + 1 (%d)\n", m, col);
249     }
250     if (DMDAYPeriodic(wrap) && (n % col)){
251       SETERRQ2(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y (%d) is divisible\n\
252                  by 2*stencil_width + 1 (%d)\n", n, col);
253     }
254     if (ctype == IS_COLORING_GLOBAL) {
255       if (!dd->localcoloring) {
256 	ierr = PetscMalloc(nc*nx*ny*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
257 	ii = 0;
258 	for (j=ys; j<ys+ny; j++) {
259 	  for (i=xs; i<xs+nx; i++) {
260 	    for (k=0; k<nc; k++) {
261 	      colors[ii++] = k + nc*((i % col) + col*(j % col));
262 	    }
263 	  }
264 	}
265         ncolors = nc + nc*(col-1 + col*(col-1));
266 	ierr = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,&dd->localcoloring);CHKERRQ(ierr);
267       }
268       *coloring = dd->localcoloring;
269     } else if (ctype == IS_COLORING_GHOSTED) {
270       if (!dd->ghostedcoloring) {
271 	ierr = PetscMalloc(nc*gnx*gny*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
272 	ii = 0;
273 	for (j=gys; j<gys+gny; j++) {
274 	  for (i=gxs; i<gxs+gnx; i++) {
275 	    for (k=0; k<nc; k++) {
276 	      /* the complicated stuff is to handle periodic boundaries */
277 	      colors[ii++] = k + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col));
278 	    }
279 	  }
280 	}
281         ncolors = nc + nc*(col - 1 + col*(col-1));
282 	ierr = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,&dd->ghostedcoloring);CHKERRQ(ierr);
283         /* PetscIntView(ncolors,(PetscInt *)colors,0); */
284 
285 	ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_GHOSTED);CHKERRQ(ierr);
286       }
287       *coloring = dd->ghostedcoloring;
288     } else SETERRQ1(((PetscObject)da)->comm,PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
289   }
290   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
291   PetscFunctionReturn(0);
292 }
293 
294 /* ---------------------------------------------------------------------------------*/
295 
296 #undef __FUNCT__
297 #define __FUNCT__ "DMGetColoring_DA_3d_MPIAIJ"
298 PetscErrorCode DMGetColoring_DA_3d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
299 {
300   PetscErrorCode    ierr;
301   PetscInt          xs,ys,nx,ny,i,j,gxs,gys,gnx,gny,m,n,p,dim,s,k,nc,col,zs,gzs,ii,l,nz,gnz,M,N,P;
302   PetscInt          ncolors;
303   MPI_Comm          comm;
304   DMDAPeriodicType  wrap;
305   DMDAStencilType   st;
306   ISColoringValue   *colors;
307   DM_DA             *dd = (DM_DA*)da->data;
308 
309   PetscFunctionBegin;
310   /*
311          nc - number of components per grid point
312          col - number of colors needed in one direction for single component problem
313 
314   */
315   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&wrap,&st);CHKERRQ(ierr);
316   col    = 2*s + 1;
317   if (DMDAXPeriodic(wrap) && (m % col)){
318     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
319                  by 2*stencil_width + 1\n");
320   }
321   if (DMDAYPeriodic(wrap) && (n % col)){
322     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
323                  by 2*stencil_width + 1\n");
324   }
325   if (DMDAZPeriodic(wrap) && (p % col)){
326     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
327                  by 2*stencil_width + 1\n");
328   }
329 
330   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
331   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
332   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
333 
334   /* create the coloring */
335   if (ctype == IS_COLORING_GLOBAL) {
336     if (!dd->localcoloring) {
337       ierr = PetscMalloc(nc*nx*ny*nz*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
338       ii = 0;
339       for (k=zs; k<zs+nz; k++) {
340         for (j=ys; j<ys+ny; j++) {
341           for (i=xs; i<xs+nx; i++) {
342             for (l=0; l<nc; l++) {
343               colors[ii++] = l + nc*((i % col) + col*(j % col) + col*col*(k % col));
344             }
345           }
346         }
347       }
348       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
349       ierr = ISColoringCreate(comm,ncolors,nc*nx*ny*nz,colors,&dd->localcoloring);CHKERRQ(ierr);
350     }
351     *coloring = dd->localcoloring;
352   } else if (ctype == IS_COLORING_GHOSTED) {
353     if (!dd->ghostedcoloring) {
354       ierr = PetscMalloc(nc*gnx*gny*gnz*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
355       ii = 0;
356       for (k=gzs; k<gzs+gnz; k++) {
357         for (j=gys; j<gys+gny; j++) {
358           for (i=gxs; i<gxs+gnx; i++) {
359             for (l=0; l<nc; l++) {
360               /* the complicated stuff is to handle periodic boundaries */
361               colors[ii++] = l + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col) + col*col*(SetInRange(k,p) % col));
362             }
363           }
364         }
365       }
366       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
367       ierr = ISColoringCreate(comm,ncolors,nc*gnx*gny*gnz,colors,&dd->ghostedcoloring);CHKERRQ(ierr);
368       ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_GHOSTED);CHKERRQ(ierr);
369     }
370     *coloring = dd->ghostedcoloring;
371   } else SETERRQ1(((PetscObject)da)->comm,PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
372   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
373   PetscFunctionReturn(0);
374 }
375 
376 /* ---------------------------------------------------------------------------------*/
377 
378 #undef __FUNCT__
379 #define __FUNCT__ "DMGetColoring_DA_1d_MPIAIJ"
380 PetscErrorCode DMGetColoring_DA_1d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
381 {
382   PetscErrorCode    ierr;
383   PetscInt          xs,nx,i,i1,gxs,gnx,l,m,M,dim,s,nc,col;
384   PetscInt          ncolors;
385   MPI_Comm          comm;
386   DMDAPeriodicType  wrap;
387   ISColoringValue   *colors;
388   DM_DA             *dd = (DM_DA*)da->data;
389 
390   PetscFunctionBegin;
391   /*
392          nc - number of components per grid point
393          col - number of colors needed in one direction for single component problem
394 
395   */
396   ierr = DMDAGetInfo(da,&dim,&m,0,0,&M,0,0,&nc,&s,&wrap,0);CHKERRQ(ierr);
397   col    = 2*s + 1;
398 
399   if (DMDAXPeriodic(wrap) && (m % col)) {
400     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points is divisible\n\
401                  by 2*stencil_width + 1\n");
402   }
403 
404   ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
405   ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
406   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
407 
408   /* create the coloring */
409   if (ctype == IS_COLORING_GLOBAL) {
410     if (!dd->localcoloring) {
411       ierr = PetscMalloc(nc*nx*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
412       i1 = 0;
413       for (i=xs; i<xs+nx; i++) {
414         for (l=0; l<nc; l++) {
415           colors[i1++] = l + nc*(i % col);
416         }
417       }
418       ncolors = nc + nc*(col-1);
419       ierr = ISColoringCreate(comm,ncolors,nc*nx,colors,&dd->localcoloring);CHKERRQ(ierr);
420     }
421     *coloring = dd->localcoloring;
422   } else if (ctype == IS_COLORING_GHOSTED) {
423     if (!dd->ghostedcoloring) {
424       ierr = PetscMalloc(nc*gnx*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
425       i1 = 0;
426       for (i=gxs; i<gxs+gnx; i++) {
427         for (l=0; l<nc; l++) {
428           /* the complicated stuff is to handle periodic boundaries */
429           colors[i1++] = l + nc*(SetInRange(i,m) % col);
430         }
431       }
432       ncolors = nc + nc*(col-1);
433       ierr = ISColoringCreate(comm,ncolors,nc*gnx,colors,&dd->ghostedcoloring);CHKERRQ(ierr);
434       ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_GHOSTED);CHKERRQ(ierr);
435     }
436     *coloring = dd->ghostedcoloring;
437   } else SETERRQ1(((PetscObject)da)->comm,PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
438   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
439   PetscFunctionReturn(0);
440 }
441 
442 #undef __FUNCT__
443 #define __FUNCT__ "DMGetColoring_DA_2d_5pt_MPIAIJ"
444 PetscErrorCode DMGetColoring_DA_2d_5pt_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
445 {
446   PetscErrorCode    ierr;
447   PetscInt          xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,dim,s,k,nc;
448   PetscInt          ncolors;
449   MPI_Comm          comm;
450   DMDAPeriodicType  wrap;
451   ISColoringValue   *colors;
452   DM_DA             *dd = (DM_DA*)da->data;
453 
454   PetscFunctionBegin;
455   /*
456          nc - number of components per grid point
457          col - number of colors needed in one direction for single component problem
458 
459   */
460   ierr   = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&wrap,0);CHKERRQ(ierr);
461   ierr   = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
462   ierr   = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
463   ierr   = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
464 
465   if (DMDAXPeriodic(wrap) && (m % 5)){
466     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
467                  by 5\n");
468   }
469   if (DMDAYPeriodic(wrap) && (n % 5)){
470     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
471                  by 5\n");
472   }
473 
474   /* create the coloring */
475   if (ctype == IS_COLORING_GLOBAL) {
476     if (!dd->localcoloring) {
477       ierr = PetscMalloc(nc*nx*ny*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
478       ii = 0;
479       for (j=ys; j<ys+ny; j++) {
480 	for (i=xs; i<xs+nx; i++) {
481 	  for (k=0; k<nc; k++) {
482 	    colors[ii++] = k + nc*((3*j+i) % 5);
483 	  }
484 	}
485       }
486       ncolors = 5*nc;
487       ierr = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,&dd->localcoloring);CHKERRQ(ierr);
488     }
489     *coloring = dd->localcoloring;
490   } else if (ctype == IS_COLORING_GHOSTED) {
491     if (!dd->ghostedcoloring) {
492       ierr = PetscMalloc(nc*gnx*gny*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
493       ii = 0;
494       for (j=gys; j<gys+gny; j++) {
495 	for (i=gxs; i<gxs+gnx; i++) {
496 	  for (k=0; k<nc; k++) {
497 	    colors[ii++] = k + nc*((3*SetInRange(j,n) + SetInRange(i,m)) % 5);
498 	  }
499 	}
500       }
501       ncolors = 5*nc;
502       ierr = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,&dd->ghostedcoloring);CHKERRQ(ierr);
503       ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_GHOSTED);CHKERRQ(ierr);
504     }
505     *coloring = dd->ghostedcoloring;
506   } else SETERRQ1(((PetscObject)da)->comm,PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
507   PetscFunctionReturn(0);
508 }
509 
510 /* =========================================================================== */
511 EXTERN PetscErrorCode DMGetMatrix_DA_1d_MPIAIJ(DM,Mat);
512 EXTERN PetscErrorCode DMGetMatrix_DA_2d_MPIAIJ(DM,Mat);
513 EXTERN PetscErrorCode DMGetMatrix_DA_2d_MPIAIJ_Fill(DM,Mat);
514 EXTERN PetscErrorCode DMGetMatrix_DA_3d_MPIAIJ(DM,Mat);
515 EXTERN PetscErrorCode DMGetMatrix_DA_3d_MPIAIJ_Fill(DM,Mat);
516 EXTERN PetscErrorCode DMGetMatrix_DA_2d_MPIBAIJ(DM,Mat);
517 EXTERN PetscErrorCode DMGetMatrix_DA_3d_MPIBAIJ(DM,Mat);
518 EXTERN PetscErrorCode DMGetMatrix_DA_2d_MPISBAIJ(DM,Mat);
519 EXTERN PetscErrorCode DMGetMatrix_DA_3d_MPISBAIJ(DM,Mat);
520 
521 #undef __FUNCT__
522 #define __FUNCT__ "MatSetDA"
523 /*@
524    MatSetDA - Sets the DMDA that is to be used by the HYPRE_StructMatrix PETSc matrix
525 
526    Logically Collective on Mat
527 
528    Input Parameters:
529 +  mat - the matrix
530 -  da - the da
531 
532    Level: intermediate
533 
534 @*/
535 PetscErrorCode PETSCDM_DLLEXPORT MatSetDA(Mat mat,DM da)
536 {
537   PetscErrorCode ierr;
538 
539   PetscFunctionBegin;
540   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
541   PetscValidHeaderSpecific(da,DM_CLASSID,1);
542   ierr = PetscTryMethod(mat,"MatSetDA_C",(Mat,DM),(mat,da));CHKERRQ(ierr);
543   PetscFunctionReturn(0);
544 }
545 
546 EXTERN_C_BEGIN
547 #undef __FUNCT__
548 #define __FUNCT__ "MatView_MPI_DA"
549 PetscErrorCode PETSCDM_DLLEXPORT MatView_MPI_DA(Mat A,PetscViewer viewer)
550 {
551   DM             da;
552   PetscErrorCode ierr;
553   const char     *prefix;
554   Mat            Anatural;
555   AO             ao;
556   PetscInt       rstart,rend,*petsc,i;
557   IS             is;
558   MPI_Comm       comm;
559 
560   PetscFunctionBegin;
561   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
562   ierr = PetscObjectQuery((PetscObject)A,"DMDA",(PetscObject*)&da);CHKERRQ(ierr);
563   if (!da) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
564 
565   ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);
566   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
567   ierr = PetscMalloc((rend-rstart)*sizeof(PetscInt),&petsc);CHKERRQ(ierr);
568   for (i=rstart; i<rend; i++) petsc[i-rstart] = i;
569   ierr = AOApplicationToPetsc(ao,rend-rstart,petsc);CHKERRQ(ierr);
570   ierr = ISCreateGeneral(comm,rend-rstart,petsc,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
571 
572   /* call viewer on natural ordering */
573   ierr = MatGetSubMatrix(A,is,is,MAT_INITIAL_MATRIX,&Anatural);CHKERRQ(ierr);
574   ierr = ISDestroy(is);CHKERRQ(ierr);
575   ierr = PetscObjectGetOptionsPrefix((PetscObject)A,&prefix);CHKERRQ(ierr);
576   ierr = PetscObjectSetOptionsPrefix((PetscObject)Anatural,prefix);CHKERRQ(ierr);
577   ierr = PetscObjectSetName((PetscObject)Anatural,((PetscObject)A)->name);CHKERRQ(ierr);
578   ierr = MatView(Anatural,viewer);CHKERRQ(ierr);
579   ierr = MatDestroy(Anatural);CHKERRQ(ierr);
580   PetscFunctionReturn(0);
581 }
582 EXTERN_C_END
583 
584 EXTERN_C_BEGIN
585 #undef __FUNCT__
586 #define __FUNCT__ "MatLoad_MPI_DA"
587 PetscErrorCode PETSCDM_DLLEXPORT MatLoad_MPI_DA(Mat A,PetscViewer viewer)
588 {
589   DM             da;
590   PetscErrorCode ierr;
591   Mat            Anatural,Aapp;
592   AO             ao;
593   PetscInt       rstart,rend,*app,i;
594   IS             is;
595   MPI_Comm       comm;
596 
597   PetscFunctionBegin;
598   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
599   ierr = PetscObjectQuery((PetscObject)A,"DMDA",(PetscObject*)&da);CHKERRQ(ierr);
600   if (!da) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
601 
602   /* Load the matrix in natural ordering */
603   ierr = MatCreate(((PetscObject)A)->comm,&Anatural);CHKERRQ(ierr);
604   ierr = MatSetType(Anatural,((PetscObject)A)->type_name);CHKERRQ(ierr);
605   ierr = MatSetSizes(Anatural,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
606   ierr = MatLoad(Anatural,viewer);CHKERRQ(ierr);
607 
608   /* Map natural ordering to application ordering and create IS */
609   ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);
610   ierr = MatGetOwnershipRange(Anatural,&rstart,&rend);CHKERRQ(ierr);
611   ierr = PetscMalloc((rend-rstart)*sizeof(PetscInt),&app);CHKERRQ(ierr);
612   for (i=rstart; i<rend; i++) app[i-rstart] = i;
613   ierr = AOPetscToApplication(ao,rend-rstart,app);CHKERRQ(ierr);
614   ierr = ISCreateGeneral(comm,rend-rstart,app,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
615 
616   /* Do permutation and replace header */
617   ierr = MatGetSubMatrix(Anatural,is,is,MAT_INITIAL_MATRIX,&Aapp);CHKERRQ(ierr);
618   ierr = MatHeaderReplace(A,Aapp);CHKERRQ(ierr);
619   ierr = ISDestroy(is);CHKERRQ(ierr);
620   ierr = MatDestroy(Anatural);CHKERRQ(ierr);
621   PetscFunctionReturn(0);
622 }
623 EXTERN_C_END
624 
625 #undef __FUNCT__
626 #define __FUNCT__ "DMGetMatrix_DA"
627 PetscErrorCode PETSCDM_DLLEXPORT DMGetMatrix_DA(DM da, const MatType mtype,Mat *J)
628 {
629   PetscErrorCode ierr;
630   PetscInt       dim,dof,nx,ny,nz,dims[3],starts[3],M,N,P;
631   Mat            A;
632   MPI_Comm       comm;
633   const MatType  Atype;
634   void           (*aij)(void)=PETSC_NULL,(*baij)(void)=PETSC_NULL,(*sbaij)(void)=PETSC_NULL;
635   MatType        ttype[256];
636   PetscBool      flg;
637   PetscMPIInt    size;
638   DM_DA          *dd = (DM_DA*)da->data;
639 
640   PetscFunctionBegin;
641 #ifndef PETSC_USE_DYNAMIC_LIBRARIES
642   ierr = MatInitializePackage(PETSC_NULL);CHKERRQ(ierr);
643 #endif
644   ierr = PetscStrcpy((char*)ttype,mtype);CHKERRQ(ierr);
645   ierr = PetscOptionsBegin(((PetscObject)da)->comm,((PetscObject)da)->prefix,"DMDA options","Mat");CHKERRQ(ierr);
646   ierr = PetscOptionsList("-da_mat_type","Matrix type","MatSetType",MatList,mtype,(char*)ttype,256,&flg);CHKERRQ(ierr);
647   ierr = PetscOptionsEnd();
648 
649   /*
650                                   m
651           ------------------------------------------------------
652          |                                                     |
653          |                                                     |
654          |               ----------------------                |
655          |               |                    |                |
656       n  |           ny  |                    |                |
657          |               |                    |                |
658          |               .---------------------                |
659          |             (xs,ys)     nx                          |
660          |            .                                        |
661          |         (gxs,gys)                                   |
662          |                                                     |
663           -----------------------------------------------------
664   */
665 
666   /*
667          nc - number of components per grid point
668          col - number of colors needed in one direction for single component problem
669 
670   */
671   ierr = DMDAGetInfo(da,&dim,&M,&N,&P,0,0,0,&dof,0,0,0);CHKERRQ(ierr);
672   ierr = DMDAGetCorners(da,0,0,0,&nx,&ny,&nz);CHKERRQ(ierr);
673   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
674   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
675   ierr = MatSetSizes(A,dof*nx*ny*nz,dof*nx*ny*nz,dof*M*N*P,dof*M*N*P);CHKERRQ(ierr);
676   ierr = MatSetType(A,(const MatType)ttype);CHKERRQ(ierr);
677   ierr = MatSetDA(A,da);CHKERRQ(ierr);
678   ierr = MatSetFromOptions(A);CHKERRQ(ierr);
679   ierr = MatGetType(A,&Atype);CHKERRQ(ierr);
680   /*
681      We do not provide a getmatrix function in the DMDA operations because
682    the basic DMDA does not know about matrices. We think of DMDA as being more
683    more low-level than matrices. This is kind of cheating but, cause sometimes
684    we think of DMDA has higher level than matrices.
685 
686      We could switch based on Atype (or mtype), but we do not since the
687    specialized setting routines depend only the particular preallocation
688    details of the matrix, not the type itself.
689   */
690   ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJSetPreallocation_C",&aij);CHKERRQ(ierr);
691   if (!aij) {
692     ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJSetPreallocation_C",&aij);CHKERRQ(ierr);
693   }
694   if (!aij) {
695     ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIBAIJSetPreallocation_C",&baij);CHKERRQ(ierr);
696     if (!baij) {
697       ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",&baij);CHKERRQ(ierr);
698     }
699     if (!baij){
700       ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPISBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr);
701       if (!sbaij) {
702         ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr);
703       }
704     }
705   }
706   if (aij) {
707     if (dim == 1) {
708       ierr = DMGetMatrix_DA_1d_MPIAIJ(da,A);CHKERRQ(ierr);
709     } else if (dim == 2) {
710       if (dd->ofill) {
711         ierr = DMGetMatrix_DA_2d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
712       } else {
713         ierr = DMGetMatrix_DA_2d_MPIAIJ(da,A);CHKERRQ(ierr);
714       }
715     } else if (dim == 3) {
716       if (dd->ofill) {
717         ierr = DMGetMatrix_DA_3d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
718       } else {
719         ierr = DMGetMatrix_DA_3d_MPIAIJ(da,A);CHKERRQ(ierr);
720       }
721     }
722   } else if (baij) {
723     if (dim == 2) {
724       ierr = DMGetMatrix_DA_2d_MPIBAIJ(da,A);CHKERRQ(ierr);
725     } else if (dim == 3) {
726       ierr = DMGetMatrix_DA_3d_MPIBAIJ(da,A);CHKERRQ(ierr);
727     } else {
728       SETERRQ3(((PetscObject)da)->comm,PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension!\n" \
729 	       "Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
730     }
731   } else if (sbaij) {
732     if (dim == 2) {
733       ierr = DMGetMatrix_DA_2d_MPISBAIJ(da,A);CHKERRQ(ierr);
734     } else if (dim == 3) {
735       ierr = DMGetMatrix_DA_3d_MPISBAIJ(da,A);CHKERRQ(ierr);
736     } else {
737       SETERRQ3(((PetscObject)da)->comm,PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension!\n" \
738 	       "Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
739     }
740   }
741   ierr = DMDAGetGhostCorners(da,&starts[0],&starts[1],&starts[2],&dims[0],&dims[1],&dims[2]);CHKERRQ(ierr);
742   ierr = MatSetStencil(A,dim,dims,starts,dof);CHKERRQ(ierr);
743   ierr = PetscObjectCompose((PetscObject)A,"DMDA",(PetscObject)da);CHKERRQ(ierr);
744   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
745   if (size > 1) {
746     /* change viewer to display matrix in natural ordering */
747     ierr = MatShellSetOperation(A, MATOP_VIEW, (void (*)(void)) MatView_MPI_DA);CHKERRQ(ierr);
748     ierr = MatShellSetOperation(A, MATOP_LOAD, (void (*)(void)) MatLoad_MPI_DA);CHKERRQ(ierr);
749   }
750   *J = A;
751   PetscFunctionReturn(0);
752 }
753 
754 /* ---------------------------------------------------------------------------------*/
755 #undef __FUNCT__
756 #define __FUNCT__ "DMGetMatrix_DA_2d_MPIAIJ"
757 PetscErrorCode DMGetMatrix_DA_2d_MPIAIJ(DM da,Mat J)
758 {
759   PetscErrorCode         ierr;
760   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = PETSC_NULL,k,nc,*rows = PETSC_NULL,col,cnt,l,p;
761   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
762   MPI_Comm               comm;
763   PetscScalar            *values;
764   DMDAPeriodicType       wrap;
765   ISLocalToGlobalMapping ltog,ltogb;
766   DMDAStencilType        st;
767   DM_DA                  *dd = (DM_DA*)da->data;
768 
769   PetscFunctionBegin;
770   /*
771          nc - number of components per grid point
772          col - number of colors needed in one direction for single component problem
773 
774   */
775   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&wrap,&st);CHKERRQ(ierr);
776   col = 2*s + 1;
777   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
778   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
779   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
780 
781   ierr = PetscMalloc2(nc,PetscInt,&rows,col*col*nc*nc,PetscInt,&cols);CHKERRQ(ierr);
782   ierr = DMDAGetISLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
783   ierr = DMDAGetISLocalToGlobalMappingBlck(da,&ltogb);CHKERRQ(ierr);
784 
785   /* determine the matrix preallocation information */
786   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
787   for (i=xs; i<xs+nx; i++) {
788 
789     pstart = DMDAXPeriodic(wrap) ? -s : (PetscMax(-s,-i));
790     pend   = DMDAXPeriodic(wrap) ?  s : (PetscMin(s,m-i-1));
791 
792     for (j=ys; j<ys+ny; j++) {
793       slot = i - gxs + gnx*(j - gys);
794 
795       lstart = DMDAYPeriodic(wrap) ? -s : (PetscMax(-s,-j));
796       lend   = DMDAYPeriodic(wrap) ?  s : (PetscMin(s,n-j-1));
797 
798       cnt  = 0;
799       for (k=0; k<nc; k++) {
800 	for (l=lstart; l<lend+1; l++) {
801 	  for (p=pstart; p<pend+1; p++) {
802 	    if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
803 	      cols[cnt++]  = k + nc*(slot + gnx*l + p);
804 	    }
805 	  }
806 	}
807 	rows[k] = k + nc*(slot);
808       }
809       ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
810     }
811   }
812   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
813   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
814   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
815   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
816 
817   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
818   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
819 
820   /*
821     For each node in the grid: we get the neighbors in the local (on processor ordering
822     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
823     PETSc ordering.
824   */
825   if (!dd->prealloc_only) {
826     ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
827     ierr = PetscMemzero(values,col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
828     for (i=xs; i<xs+nx; i++) {
829 
830       pstart = DMDAXPeriodic(wrap) ? -s : (PetscMax(-s,-i));
831       pend   = DMDAXPeriodic(wrap) ?  s : (PetscMin(s,m-i-1));
832 
833       for (j=ys; j<ys+ny; j++) {
834 	slot = i - gxs + gnx*(j - gys);
835 
836 	lstart = DMDAYPeriodic(wrap) ? -s : (PetscMax(-s,-j));
837 	lend   = DMDAYPeriodic(wrap) ?  s : (PetscMin(s,n-j-1));
838 
839 	cnt  = 0;
840 	for (k=0; k<nc; k++) {
841 	  for (l=lstart; l<lend+1; l++) {
842 	    for (p=pstart; p<pend+1; p++) {
843 	      if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
844 		cols[cnt++]  = k + nc*(slot + gnx*l + p);
845 	      }
846 	    }
847 	  }
848 	  rows[k]      = k + nc*(slot);
849 	}
850 	ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
851       }
852     }
853     ierr = PetscFree(values);CHKERRQ(ierr);
854     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
855     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
856   }
857   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
858   PetscFunctionReturn(0);
859 }
860 
861 #undef __FUNCT__
862 #define __FUNCT__ "DMGetMatrix_DA_2d_MPIAIJ_Fill"
863 PetscErrorCode DMGetMatrix_DA_2d_MPIAIJ_Fill(DM da,Mat J)
864 {
865   PetscErrorCode         ierr;
866   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
867   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt,l,p;
868   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
869   DM_DA                  *dd = (DM_DA*)da->data;
870   PetscInt               ifill_col,*ofill = dd->ofill, *dfill = dd->dfill;
871   MPI_Comm               comm;
872   PetscScalar            *values;
873   DMDAPeriodicType       wrap;
874   ISLocalToGlobalMapping ltog,ltogb;
875   DMDAStencilType        st;
876 
877   PetscFunctionBegin;
878   /*
879          nc - number of components per grid point
880          col - number of colors needed in one direction for single component problem
881 
882   */
883   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&wrap,&st);CHKERRQ(ierr);
884   col = 2*s + 1;
885   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
886   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
887   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
888 
889   ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscInt),&cols);CHKERRQ(ierr);
890   ierr = DMDAGetISLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
891   ierr = DMDAGetISLocalToGlobalMappingBlck(da,&ltogb);CHKERRQ(ierr);
892 
893   /* determine the matrix preallocation information */
894   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
895   for (i=xs; i<xs+nx; i++) {
896 
897     pstart = DMDAXPeriodic(wrap) ? -s : (PetscMax(-s,-i));
898     pend   = DMDAXPeriodic(wrap) ?  s : (PetscMin(s,m-i-1));
899 
900     for (j=ys; j<ys+ny; j++) {
901       slot = i - gxs + gnx*(j - gys);
902 
903       lstart = DMDAYPeriodic(wrap) ? -s : (PetscMax(-s,-j));
904       lend   = DMDAYPeriodic(wrap) ?  s : (PetscMin(s,n-j-1));
905 
906       for (k=0; k<nc; k++) {
907         cnt  = 0;
908 	for (l=lstart; l<lend+1; l++) {
909 	  for (p=pstart; p<pend+1; p++) {
910             if (l || p) {
911 	      if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
912                 for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++)
913 		  cols[cnt++]  = ofill[ifill_col] + nc*(slot + gnx*l + p);
914 	      }
915             } else {
916 	      if (dfill) {
917 		for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++)
918 		  cols[cnt++]  = dfill[ifill_col] + nc*(slot + gnx*l + p);
919 	      } else {
920 		for (ifill_col=0; ifill_col<nc; ifill_col++)
921 		  cols[cnt++]  = ifill_col + nc*(slot + gnx*l + p);
922 	      }
923             }
924 	  }
925 	}
926 	row = k + nc*(slot);
927         ierr = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
928       }
929     }
930   }
931   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
932   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
933   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
934   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
935   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
936 
937   /*
938     For each node in the grid: we get the neighbors in the local (on processor ordering
939     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
940     PETSc ordering.
941   */
942   if (!dd->prealloc_only) {
943     ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
944     ierr = PetscMemzero(values,col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
945     for (i=xs; i<xs+nx; i++) {
946 
947       pstart = DMDAXPeriodic(wrap) ? -s : (PetscMax(-s,-i));
948       pend   = DMDAXPeriodic(wrap) ?  s : (PetscMin(s,m-i-1));
949 
950       for (j=ys; j<ys+ny; j++) {
951 	slot = i - gxs + gnx*(j - gys);
952 
953 	lstart = DMDAYPeriodic(wrap) ? -s : (PetscMax(-s,-j));
954 	lend   = DMDAYPeriodic(wrap) ?  s : (PetscMin(s,n-j-1));
955 
956 	for (k=0; k<nc; k++) {
957 	  cnt  = 0;
958 	  for (l=lstart; l<lend+1; l++) {
959 	    for (p=pstart; p<pend+1; p++) {
960 	      if (l || p) {
961 		if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
962 		  for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++)
963 		    cols[cnt++]  = ofill[ifill_col] + nc*(slot + gnx*l + p);
964 		}
965 	      } else {
966 		if (dfill) {
967 		  for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++)
968 		    cols[cnt++]  = dfill[ifill_col] + nc*(slot + gnx*l + p);
969 		} else {
970 		  for (ifill_col=0; ifill_col<nc; ifill_col++)
971 		    cols[cnt++]  = ifill_col + nc*(slot + gnx*l + p);
972 		}
973 	      }
974 	    }
975 	  }
976 	  row  = k + nc*(slot);
977 	  ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
978 	}
979       }
980     }
981     ierr = PetscFree(values);CHKERRQ(ierr);
982     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
983     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
984   }
985   ierr = PetscFree(cols);CHKERRQ(ierr);
986   PetscFunctionReturn(0);
987 }
988 
989 /* ---------------------------------------------------------------------------------*/
990 
991 #undef __FUNCT__
992 #define __FUNCT__ "DMGetMatrix_DA_3d_MPIAIJ"
993 PetscErrorCode DMGetMatrix_DA_3d_MPIAIJ(DM da,Mat J)
994 {
995   PetscErrorCode         ierr;
996   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
997   PetscInt               m,n,dim,s,*cols = PETSC_NULL,k,nc,*rows = PETSC_NULL,col,cnt,l,p,*dnz = PETSC_NULL,*onz = PETSC_NULL;
998   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
999   MPI_Comm               comm;
1000   PetscScalar            *values;
1001   DMDAPeriodicType       wrap;
1002   ISLocalToGlobalMapping ltog,ltogb;
1003   DMDAStencilType        st;
1004   DM_DA                  *dd = (DM_DA*)da->data;
1005 
1006   PetscFunctionBegin;
1007   /*
1008          nc - number of components per grid point
1009          col - number of colors needed in one direction for single component problem
1010 
1011   */
1012   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&wrap,&st);CHKERRQ(ierr);
1013   col    = 2*s + 1;
1014 
1015   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1016   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1017   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1018 
1019   ierr = PetscMalloc2(nc,PetscInt,&rows,col*col*col*nc*nc,PetscInt,&cols);CHKERRQ(ierr);
1020   ierr = DMDAGetISLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1021   ierr = DMDAGetISLocalToGlobalMappingBlck(da,&ltogb);CHKERRQ(ierr);
1022 
1023   /* determine the matrix preallocation information */
1024   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1025   for (i=xs; i<xs+nx; i++) {
1026     istart = DMDAXPeriodic(wrap) ? -s : (PetscMax(-s,-i));
1027     iend   = DMDAXPeriodic(wrap) ?  s : (PetscMin(s,m-i-1));
1028     for (j=ys; j<ys+ny; j++) {
1029       jstart = DMDAYPeriodic(wrap) ? -s : (PetscMax(-s,-j));
1030       jend   = DMDAYPeriodic(wrap) ?  s : (PetscMin(s,n-j-1));
1031       for (k=zs; k<zs+nz; k++) {
1032 	kstart = DMDAZPeriodic(wrap) ? -s : (PetscMax(-s,-k));
1033 	kend   = DMDAZPeriodic(wrap) ?  s : (PetscMin(s,p-k-1));
1034 
1035 	slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1036 
1037 	cnt  = 0;
1038 	for (l=0; l<nc; l++) {
1039 	  for (ii=istart; ii<iend+1; ii++) {
1040 	    for (jj=jstart; jj<jend+1; jj++) {
1041 	      for (kk=kstart; kk<kend+1; kk++) {
1042 		if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1043 		  cols[cnt++]  = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1044 		}
1045 	      }
1046 	    }
1047 	  }
1048 	  rows[l] = l + nc*(slot);
1049 	}
1050 	ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1051       }
1052     }
1053   }
1054   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1055   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1056   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1057   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1058   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1059   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1060 
1061   /*
1062     For each node in the grid: we get the neighbors in the local (on processor ordering
1063     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1064     PETSc ordering.
1065   */
1066   if (!dd->prealloc_only) {
1067     ierr = PetscMalloc(col*col*col*nc*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1068     ierr = PetscMemzero(values,col*col*col*nc*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1069     for (i=xs; i<xs+nx; i++) {
1070       istart = DMDAXPeriodic(wrap) ? -s : (PetscMax(-s,-i));
1071       iend   = DMDAXPeriodic(wrap) ?  s : (PetscMin(s,m-i-1));
1072       for (j=ys; j<ys+ny; j++) {
1073 	jstart = DMDAYPeriodic(wrap) ? -s : (PetscMax(-s,-j));
1074 	jend   = DMDAYPeriodic(wrap) ?  s : (PetscMin(s,n-j-1));
1075 	for (k=zs; k<zs+nz; k++) {
1076 	  kstart = DMDAZPeriodic(wrap) ? -s : (PetscMax(-s,-k));
1077 	  kend   = DMDAZPeriodic(wrap) ?  s : (PetscMin(s,p-k-1));
1078 
1079 	  slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1080 
1081 	  cnt  = 0;
1082 	  for (l=0; l<nc; l++) {
1083 	    for (ii=istart; ii<iend+1; ii++) {
1084 	      for (jj=jstart; jj<jend+1; jj++) {
1085 		for (kk=kstart; kk<kend+1; kk++) {
1086 		  if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1087 		    cols[cnt++]  = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1088 		  }
1089 		}
1090 	      }
1091 	    }
1092 	    rows[l]      = l + nc*(slot);
1093 	  }
1094 	  ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1095 	}
1096       }
1097     }
1098     ierr = PetscFree(values);CHKERRQ(ierr);
1099     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1100     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1101   }
1102   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1103   PetscFunctionReturn(0);
1104 }
1105 
1106 /* ---------------------------------------------------------------------------------*/
1107 
1108 #undef __FUNCT__
1109 #define __FUNCT__ "DMGetMatrix_DA_1d_MPIAIJ"
1110 PetscErrorCode DMGetMatrix_DA_1d_MPIAIJ(DM da,Mat J)
1111 {
1112   PetscErrorCode         ierr;
1113   PetscInt               xs,nx,i,i1,slot,gxs,gnx;
1114   PetscInt               m,dim,s,*cols = PETSC_NULL,nc,*rows = PETSC_NULL,col,cnt,l;
1115   PetscInt               istart,iend;
1116   PetscScalar            *values;
1117   DMDAPeriodicType       wrap;
1118   ISLocalToGlobalMapping ltog,ltogb;
1119   DM_DA                  *dd = (DM_DA*)da->data;
1120 
1121   PetscFunctionBegin;
1122   /*
1123          nc - number of components per grid point
1124          col - number of colors needed in one direction for single component problem
1125 
1126   */
1127   ierr = DMDAGetInfo(da,&dim,&m,0,0,0,0,0,&nc,&s,&wrap,0);CHKERRQ(ierr);
1128   col    = 2*s + 1;
1129 
1130   ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
1131   ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
1132 
1133   ierr = MatSeqAIJSetPreallocation(J,col*nc,0);CHKERRQ(ierr);
1134   ierr = MatMPIAIJSetPreallocation(J,col*nc,0,col*nc,0);CHKERRQ(ierr);
1135   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1136   ierr = PetscMalloc2(nc,PetscInt,&rows,col*nc*nc,PetscInt,&cols);CHKERRQ(ierr);
1137 
1138   ierr = DMDAGetISLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1139   ierr = DMDAGetISLocalToGlobalMappingBlck(da,&ltogb);CHKERRQ(ierr);
1140   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1141   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1142 
1143   /*
1144     For each node in the grid: we get the neighbors in the local (on processor ordering
1145     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1146     PETSc ordering.
1147   */
1148   if (!dd->prealloc_only) {
1149     ierr = PetscMalloc(col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1150     ierr = PetscMemzero(values,col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1151     for (i=xs; i<xs+nx; i++) {
1152       istart = PetscMax(-s,gxs - i);
1153       iend   = PetscMin(s,gxs + gnx - i - 1);
1154       slot   = i - gxs;
1155 
1156       cnt  = 0;
1157       for (l=0; l<nc; l++) {
1158 	for (i1=istart; i1<iend+1; i1++) {
1159 	  cols[cnt++] = l + nc*(slot + i1);
1160 	}
1161 	rows[l]      = l + nc*(slot);
1162       }
1163       ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1164     }
1165     ierr = PetscFree(values);CHKERRQ(ierr);
1166     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1167     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1168   }
1169   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1170   PetscFunctionReturn(0);
1171 }
1172 
1173 #undef __FUNCT__
1174 #define __FUNCT__ "DMGetMatrix_DA_2d_MPIBAIJ"
1175 PetscErrorCode DMGetMatrix_DA_2d_MPIBAIJ(DM da,Mat J)
1176 {
1177   PetscErrorCode         ierr;
1178   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1179   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
1180   PetscInt               istart,iend,jstart,jend,ii,jj;
1181   MPI_Comm               comm;
1182   PetscScalar            *values;
1183   DMDAPeriodicType       wrap;
1184   DMDAStencilType        st;
1185   ISLocalToGlobalMapping ltog,ltogb;
1186   DM_DA                  *dd = (DM_DA*)da->data;
1187 
1188   PetscFunctionBegin;
1189   /*
1190      nc - number of components per grid point
1191      col - number of colors needed in one direction for single component problem
1192   */
1193   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&wrap,&st);CHKERRQ(ierr);
1194   col = 2*s + 1;
1195 
1196   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
1197   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
1198   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1199 
1200   ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1201 
1202   ierr = DMDAGetISLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1203   ierr = DMDAGetISLocalToGlobalMappingBlck(da,&ltogb);CHKERRQ(ierr);
1204 
1205   /* determine the matrix preallocation information */
1206   ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
1207   for (i=xs; i<xs+nx; i++) {
1208     istart = DMDAXPeriodic(wrap) ? -s : (PetscMax(-s,-i));
1209     iend   = DMDAXPeriodic(wrap) ?  s : (PetscMin(s,m-i-1));
1210     for (j=ys; j<ys+ny; j++) {
1211       jstart = DMDAYPeriodic(wrap) ? -s : (PetscMax(-s,-j));
1212       jend   = DMDAYPeriodic(wrap) ?  s : (PetscMin(s,n-j-1));
1213       slot = i - gxs + gnx*(j - gys);
1214 
1215       /* Find block columns in block row */
1216       cnt  = 0;
1217       for (ii=istart; ii<iend+1; ii++) {
1218         for (jj=jstart; jj<jend+1; jj++) {
1219           if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1220             cols[cnt++]  = slot + ii + gnx*jj;
1221           }
1222         }
1223       }
1224       ierr = MatPreallocateSetLocal(ltogb,1,&slot,ltogb,cnt,cols,dnz,onz);CHKERRQ(ierr);
1225     }
1226   }
1227   ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1228   ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1229   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1230 
1231   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1232   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1233 
1234   /*
1235     For each node in the grid: we get the neighbors in the local (on processor ordering
1236     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1237     PETSc ordering.
1238   */
1239   if (!dd->prealloc_only) {
1240     ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1241     ierr = PetscMemzero(values,col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1242     for (i=xs; i<xs+nx; i++) {
1243       istart = DMDAXPeriodic(wrap) ? -s : (PetscMax(-s,-i));
1244       iend   = DMDAXPeriodic(wrap) ?  s : (PetscMin(s,m-i-1));
1245       for (j=ys; j<ys+ny; j++) {
1246 	jstart = DMDAYPeriodic(wrap) ? -s : (PetscMax(-s,-j));
1247 	jend   = DMDAYPeriodic(wrap) ?  s : (PetscMin(s,n-j-1));
1248 	slot = i - gxs + gnx*(j - gys);
1249 	cnt  = 0;
1250         for (ii=istart; ii<iend+1; ii++) {
1251           for (jj=jstart; jj<jend+1; jj++) {
1252             if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1253               cols[cnt++]  = slot + ii + gnx*jj;
1254             }
1255           }
1256         }
1257 	ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1258       }
1259     }
1260     ierr = PetscFree(values);CHKERRQ(ierr);
1261     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1262     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1263   }
1264   ierr = PetscFree(cols);CHKERRQ(ierr);
1265   PetscFunctionReturn(0);
1266 }
1267 
1268 #undef __FUNCT__
1269 #define __FUNCT__ "DMGetMatrix_DA_3d_MPIBAIJ"
1270 PetscErrorCode DMGetMatrix_DA_3d_MPIBAIJ(DM da,Mat J)
1271 {
1272   PetscErrorCode         ierr;
1273   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1274   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
1275   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1276   MPI_Comm               comm;
1277   PetscScalar            *values;
1278   DMDAPeriodicType       wrap;
1279   DMDAStencilType        st;
1280   ISLocalToGlobalMapping ltog,ltogb;
1281   DM_DA                  *dd = (DM_DA*)da->data;
1282 
1283   PetscFunctionBegin;
1284   /*
1285          nc - number of components per grid point
1286          col - number of colors needed in one direction for single component problem
1287 
1288   */
1289   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&wrap,&st);CHKERRQ(ierr);
1290   col    = 2*s + 1;
1291 
1292   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1293   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1294   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1295 
1296   ierr  = PetscMalloc(col*col*col*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1297 
1298   ierr = DMDAGetISLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1299   ierr = DMDAGetISLocalToGlobalMappingBlck(da,&ltogb);CHKERRQ(ierr);
1300 
1301   /* determine the matrix preallocation information */
1302   ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1303   for (i=xs; i<xs+nx; i++) {
1304     istart = DMDAXPeriodic(wrap) ? -s : (PetscMax(-s,-i));
1305     iend   = DMDAXPeriodic(wrap) ?  s : (PetscMin(s,m-i-1));
1306     for (j=ys; j<ys+ny; j++) {
1307       jstart = DMDAYPeriodic(wrap) ? -s : (PetscMax(-s,-j));
1308       jend   = DMDAYPeriodic(wrap) ?  s : (PetscMin(s,n-j-1));
1309       for (k=zs; k<zs+nz; k++) {
1310 	kstart = DMDAZPeriodic(wrap) ? -s : (PetscMax(-s,-k));
1311 	kend   = DMDAZPeriodic(wrap) ?  s : (PetscMin(s,p-k-1));
1312 
1313 	slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1314 
1315 	/* Find block columns in block row */
1316 	cnt  = 0;
1317         for (ii=istart; ii<iend+1; ii++) {
1318           for (jj=jstart; jj<jend+1; jj++) {
1319             for (kk=kstart; kk<kend+1; kk++) {
1320               if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1321 		cols[cnt++]  = slot + ii + gnx*jj + gnx*gny*kk;
1322 	      }
1323 	    }
1324 	  }
1325 	}
1326 	ierr = MatPreallocateSetLocal(ltogb,1,&slot,ltogb,cnt,cols,dnz,onz);CHKERRQ(ierr);
1327       }
1328     }
1329   }
1330   ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1331   ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1332   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1333 
1334   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1335   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1336 
1337   /*
1338     For each node in the grid: we get the neighbors in the local (on processor ordering
1339     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1340     PETSc ordering.
1341   */
1342   if (!dd->prealloc_only) {
1343     ierr  = PetscMalloc(col*col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1344     ierr  = PetscMemzero(values,col*col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1345     for (i=xs; i<xs+nx; i++) {
1346       istart = DMDAXPeriodic(wrap) ? -s : (PetscMax(-s,-i));
1347       iend   = DMDAXPeriodic(wrap) ?  s : (PetscMin(s,m-i-1));
1348       for (j=ys; j<ys+ny; j++) {
1349 	jstart = DMDAYPeriodic(wrap) ? -s : (PetscMax(-s,-j));
1350 	jend   = DMDAYPeriodic(wrap) ?  s : (PetscMin(s,n-j-1));
1351 	for (k=zs; k<zs+nz; k++) {
1352 	  kstart = DMDAZPeriodic(wrap) ? -s : (PetscMax(-s,-k));
1353 	  kend   = DMDAZPeriodic(wrap) ?  s : (PetscMin(s,p-k-1));
1354 
1355 	  slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1356 
1357 	  cnt  = 0;
1358           for (ii=istart; ii<iend+1; ii++) {
1359             for (jj=jstart; jj<jend+1; jj++) {
1360               for (kk=kstart; kk<kend+1; kk++) {
1361                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1362                   cols[cnt++]  = slot + ii + gnx*jj + gnx*gny*kk;
1363                 }
1364               }
1365             }
1366           }
1367 	  ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1368 	}
1369       }
1370     }
1371     ierr = PetscFree(values);CHKERRQ(ierr);
1372     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1373     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1374   }
1375   ierr = PetscFree(cols);CHKERRQ(ierr);
1376   PetscFunctionReturn(0);
1377 }
1378 
1379 #undef __FUNCT__
1380 #define __FUNCT__ "L2GFilterUpperTriangular"
1381 /*
1382   This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to
1383   identify in the local ordering with periodic domain.
1384 */
1385 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog,PetscInt *row,PetscInt *cnt,PetscInt col[])
1386 {
1387   PetscErrorCode ierr;
1388   PetscInt       i,n;
1389 
1390   PetscFunctionBegin;
1391   ierr = ISLocalToGlobalMappingApply(ltog,1,row,row);CHKERRQ(ierr);
1392   ierr = ISLocalToGlobalMappingApply(ltog,*cnt,col,col);CHKERRQ(ierr);
1393   for (i=0,n=0; i<*cnt; i++) {
1394     if (col[i] >= *row) col[n++] = col[i];
1395   }
1396   *cnt = n;
1397   PetscFunctionReturn(0);
1398 }
1399 
1400 #undef __FUNCT__
1401 #define __FUNCT__ "DMGetMatrix_DA_2d_MPISBAIJ"
1402 PetscErrorCode DMGetMatrix_DA_2d_MPISBAIJ(DM da,Mat J)
1403 {
1404   PetscErrorCode         ierr;
1405   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1406   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
1407   PetscInt               istart,iend,jstart,jend,ii,jj;
1408   MPI_Comm               comm;
1409   PetscScalar            *values;
1410   DMDAPeriodicType       wrap;
1411   DMDAStencilType        st;
1412   ISLocalToGlobalMapping ltog,ltogb;
1413   DM_DA                  *dd = (DM_DA*)da->data;
1414 
1415   PetscFunctionBegin;
1416   /*
1417      nc - number of components per grid point
1418      col - number of colors needed in one direction for single component problem
1419   */
1420   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&wrap,&st);CHKERRQ(ierr);
1421   col = 2*s + 1;
1422 
1423   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
1424   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
1425   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1426 
1427   ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1428 
1429   ierr = DMDAGetISLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1430   ierr = DMDAGetISLocalToGlobalMappingBlck(da,&ltogb);CHKERRQ(ierr);
1431 
1432   /* determine the matrix preallocation information */
1433   ierr = MatPreallocateSymmetricInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
1434   for (i=xs; i<xs+nx; i++) {
1435     istart = DMDAXPeriodic(wrap) ? -s : (PetscMax(-s,-i));
1436     iend   = DMDAXPeriodic(wrap) ?  s : (PetscMin(s,m-i-1));
1437     for (j=ys; j<ys+ny; j++) {
1438       jstart = DMDAYPeriodic(wrap) ? -s : (PetscMax(-s,-j));
1439       jend   = DMDAYPeriodic(wrap) ?  s : (PetscMin(s,n-j-1));
1440       slot = i - gxs + gnx*(j - gys);
1441 
1442       /* Find block columns in block row */
1443       cnt  = 0;
1444       for (ii=istart; ii<iend+1; ii++) {
1445         for (jj=jstart; jj<jend+1; jj++) {
1446           if (st == DMDA_STENCIL_BOX || !ii || !jj) {
1447             cols[cnt++]  = slot + ii + gnx*jj;
1448           }
1449         }
1450       }
1451       ierr = L2GFilterUpperTriangular(ltogb,&slot,&cnt,cols);CHKERRQ(ierr);
1452       ierr = MatPreallocateSymmetricSet(slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
1453     }
1454   }
1455   ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1456   ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1457   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1458 
1459   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1460   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1461 
1462   /*
1463     For each node in the grid: we get the neighbors in the local (on processor ordering
1464     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1465     PETSc ordering.
1466   */
1467   if (!dd->prealloc_only) {
1468     ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1469     ierr = PetscMemzero(values,col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1470     for (i=xs; i<xs+nx; i++) {
1471       istart = DMDAXPeriodic(wrap) ? -s : (PetscMax(-s,-i));
1472       iend   = DMDAXPeriodic(wrap) ?  s : (PetscMin(s,m-i-1));
1473       for (j=ys; j<ys+ny; j++) {
1474         jstart = DMDAYPeriodic(wrap) ? -s : (PetscMax(-s,-j));
1475         jend   = DMDAYPeriodic(wrap) ?  s : (PetscMin(s,n-j-1));
1476         slot = i - gxs + gnx*(j - gys);
1477 
1478         /* Find block columns in block row */
1479         cnt  = 0;
1480         for (ii=istart; ii<iend+1; ii++) {
1481           for (jj=jstart; jj<jend+1; jj++) {
1482             if (st == DMDA_STENCIL_BOX || !ii || !jj) {
1483               cols[cnt++]  = slot + ii + gnx*jj;
1484             }
1485           }
1486         }
1487         ierr = L2GFilterUpperTriangular(ltogb,&slot,&cnt,cols);CHKERRQ(ierr);
1488 	ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1489       }
1490     }
1491     ierr = PetscFree(values);CHKERRQ(ierr);
1492     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1493     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1494   }
1495   ierr = PetscFree(cols);CHKERRQ(ierr);
1496   PetscFunctionReturn(0);
1497 }
1498 
1499 #undef __FUNCT__
1500 #define __FUNCT__ "DMGetMatrix_DA_3d_MPISBAIJ"
1501 PetscErrorCode DMGetMatrix_DA_3d_MPISBAIJ(DM da,Mat J)
1502 {
1503   PetscErrorCode         ierr;
1504   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1505   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
1506   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1507   MPI_Comm               comm;
1508   PetscScalar            *values;
1509   DMDAPeriodicType       wrap;
1510   DMDAStencilType        st;
1511   ISLocalToGlobalMapping ltog,ltogb;
1512   DM_DA                  *dd = (DM_DA*)da->data;
1513 
1514   PetscFunctionBegin;
1515   /*
1516      nc - number of components per grid point
1517      col - number of colors needed in one direction for single component problem
1518   */
1519   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&wrap,&st);CHKERRQ(ierr);
1520   col = 2*s + 1;
1521 
1522   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1523   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1524   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1525 
1526   /* create the matrix */
1527   ierr = PetscMalloc(col*col*col*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1528 
1529   ierr = DMDAGetISLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1530   ierr = DMDAGetISLocalToGlobalMappingBlck(da,&ltogb);CHKERRQ(ierr);
1531 
1532   /* determine the matrix preallocation information */
1533   ierr = MatPreallocateSymmetricInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1534   for (i=xs; i<xs+nx; i++) {
1535     istart = DMDAXPeriodic(wrap) ? -s : (PetscMax(-s,-i));
1536     iend   = DMDAXPeriodic(wrap) ?  s : (PetscMin(s,m-i-1));
1537     for (j=ys; j<ys+ny; j++) {
1538       jstart = DMDAYPeriodic(wrap) ? -s : (PetscMax(-s,-j));
1539       jend   = DMDAYPeriodic(wrap) ?  s : (PetscMin(s,n-j-1));
1540       for (k=zs; k<zs+nz; k++) {
1541         kstart = DMDAZPeriodic(wrap) ? -s : (PetscMax(-s,-k));
1542 	kend   = DMDAZPeriodic(wrap) ?  s : (PetscMin(s,p-k-1));
1543 
1544 	slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1545 
1546 	/* Find block columns in block row */
1547 	cnt  = 0;
1548         for (ii=istart; ii<iend+1; ii++) {
1549           for (jj=jstart; jj<jend+1; jj++) {
1550             for (kk=kstart; kk<kend+1; kk++) {
1551               if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
1552                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1553               }
1554             }
1555           }
1556         }
1557         ierr = L2GFilterUpperTriangular(ltogb,&slot,&cnt,cols);CHKERRQ(ierr);
1558         ierr = MatPreallocateSymmetricSet(slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
1559       }
1560     }
1561   }
1562   ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1563   ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1564   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1565 
1566   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1567   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1568 
1569   /*
1570     For each node in the grid: we get the neighbors in the local (on processor ordering
1571     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1572     PETSc ordering.
1573   */
1574   if (!dd->prealloc_only) {
1575     ierr = PetscMalloc(col*col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1576     ierr = PetscMemzero(values,col*col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1577     for (i=xs; i<xs+nx; i++) {
1578       istart = DMDAXPeriodic(wrap) ? -s : (PetscMax(-s,-i));
1579       iend   = DMDAXPeriodic(wrap) ?  s : (PetscMin(s,m-i-1));
1580       for (j=ys; j<ys+ny; j++) {
1581         jstart = DMDAYPeriodic(wrap) ? -s : (PetscMax(-s,-j));
1582 	jend   = DMDAYPeriodic(wrap) ?  s : (PetscMin(s,n-j-1));
1583 	for (k=zs; k<zs+nz; k++) {
1584           kstart = DMDAZPeriodic(wrap) ? -s : (PetscMax(-s,-k));
1585 	  kend   = DMDAZPeriodic(wrap) ?  s : (PetscMin(s,p-k-1));
1586 
1587 	  slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1588 
1589 	  cnt  = 0;
1590           for (ii=istart; ii<iend+1; ii++) {
1591             for (jj=jstart; jj<jend+1; jj++) {
1592               for (kk=kstart; kk<kend+1; kk++) {
1593                 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
1594 		  cols[cnt++]  = slot + ii + gnx*jj + gnx*gny*kk;
1595 		}
1596 	      }
1597 	    }
1598 	  }
1599           ierr = L2GFilterUpperTriangular(ltogb,&slot,&cnt,cols);CHKERRQ(ierr);
1600           ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1601 	}
1602       }
1603     }
1604     ierr = PetscFree(values);CHKERRQ(ierr);
1605     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1606     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1607   }
1608   ierr = PetscFree(cols);CHKERRQ(ierr);
1609   PetscFunctionReturn(0);
1610 }
1611 
1612 /* ---------------------------------------------------------------------------------*/
1613 
1614 #undef __FUNCT__
1615 #define __FUNCT__ "DMGetMatrix_DA_3d_MPIAIJ_Fill"
1616 PetscErrorCode DMGetMatrix_DA_3d_MPIAIJ_Fill(DM da,Mat J)
1617 {
1618   PetscErrorCode         ierr;
1619   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1620   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt,l,p,*dnz,*onz;
1621   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1622   DM_DA                  *dd = (DM_DA*)da->data;
1623   PetscInt               ifill_col,*dfill = dd->dfill,*ofill = dd->ofill;
1624   MPI_Comm               comm;
1625   PetscScalar            *values;
1626   DMDAPeriodicType       wrap;
1627   ISLocalToGlobalMapping ltog,ltogb;
1628   DMDAStencilType        st;
1629 
1630   PetscFunctionBegin;
1631   /*
1632          nc - number of components per grid point
1633          col - number of colors needed in one direction for single component problem
1634 
1635   */
1636   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&wrap,&st);CHKERRQ(ierr);
1637   col    = 2*s + 1;
1638   if (DMDAXPeriodic(wrap) && (m % col)){
1639     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
1640                  by 2*stencil_width + 1\n");
1641   }
1642   if (DMDAYPeriodic(wrap) && (n % col)){
1643     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
1644                  by 2*stencil_width + 1\n");
1645   }
1646   if (DMDAZPeriodic(wrap) && (p % col)){
1647     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
1648                  by 2*stencil_width + 1\n");
1649   }
1650 
1651   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1652   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1653   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1654 
1655   ierr = PetscMalloc(col*col*col*nc*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1656   ierr = DMDAGetISLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1657   ierr = DMDAGetISLocalToGlobalMappingBlck(da,&ltogb);CHKERRQ(ierr);
1658 
1659   /* determine the matrix preallocation information */
1660   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1661 
1662 
1663   for (i=xs; i<xs+nx; i++) {
1664     istart = DMDAXPeriodic(wrap) ? -s : (PetscMax(-s,-i));
1665     iend   = DMDAXPeriodic(wrap) ?  s : (PetscMin(s,m-i-1));
1666     for (j=ys; j<ys+ny; j++) {
1667       jstart = DMDAYPeriodic(wrap) ? -s : (PetscMax(-s,-j));
1668       jend   = DMDAYPeriodic(wrap) ?  s : (PetscMin(s,n-j-1));
1669       for (k=zs; k<zs+nz; k++) {
1670         kstart = DMDAZPeriodic(wrap) ? -s : (PetscMax(-s,-k));
1671         kend   = DMDAZPeriodic(wrap) ?  s : (PetscMin(s,p-k-1));
1672 
1673         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1674 
1675 	for (l=0; l<nc; l++) {
1676 	  cnt  = 0;
1677 	  for (ii=istart; ii<iend+1; ii++) {
1678 	    for (jj=jstart; jj<jend+1; jj++) {
1679 	      for (kk=kstart; kk<kend+1; kk++) {
1680 		if (ii || jj || kk) {
1681 		  if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1682 		    for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++)
1683 		      cols[cnt++]  = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1684 		  }
1685 		} else {
1686 		  if (dfill) {
1687 		    for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++)
1688 		      cols[cnt++]  = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1689 		  } else {
1690 		    for (ifill_col=0; ifill_col<nc; ifill_col++)
1691 		      cols[cnt++]  = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1692 		  }
1693 		}
1694 	      }
1695 	    }
1696 	  }
1697 	  row  = l + nc*(slot);
1698 	  ierr = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1699 	}
1700       }
1701     }
1702   }
1703   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1704   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1705   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1706   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1707   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1708 
1709   /*
1710     For each node in the grid: we get the neighbors in the local (on processor ordering
1711     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1712     PETSc ordering.
1713   */
1714   if (!dd->prealloc_only) {
1715     ierr = PetscMalloc(col*col*col*nc*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1716     ierr = PetscMemzero(values,col*col*col*nc*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1717     for (i=xs; i<xs+nx; i++) {
1718       istart = DMDAXPeriodic(wrap) ? -s : (PetscMax(-s,-i));
1719       iend   = DMDAXPeriodic(wrap) ?  s : (PetscMin(s,m-i-1));
1720       for (j=ys; j<ys+ny; j++) {
1721 	jstart = DMDAYPeriodic(wrap) ? -s : (PetscMax(-s,-j));
1722 	jend   = DMDAYPeriodic(wrap) ?  s : (PetscMin(s,n-j-1));
1723 	for (k=zs; k<zs+nz; k++) {
1724 	  kstart = DMDAZPeriodic(wrap) ? -s : (PetscMax(-s,-k));
1725 	  kend   = DMDAZPeriodic(wrap) ?  s : (PetscMin(s,p-k-1));
1726 
1727 	  slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1728 
1729 	  for (l=0; l<nc; l++) {
1730 	    cnt  = 0;
1731 	    for (ii=istart; ii<iend+1; ii++) {
1732 	      for (jj=jstart; jj<jend+1; jj++) {
1733 		for (kk=kstart; kk<kend+1; kk++) {
1734 		  if (ii || jj || kk) {
1735 		    if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1736 		      for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++)
1737 			cols[cnt++]  = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1738 		    }
1739 		  } else {
1740 		    if (dfill) {
1741 		      for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++)
1742 			cols[cnt++]  = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1743 		    } else {
1744 		      for (ifill_col=0; ifill_col<nc; ifill_col++)
1745 			cols[cnt++]  = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1746 		    }
1747 		  }
1748 		}
1749 	      }
1750 	    }
1751 	    row  = l + nc*(slot);
1752 	    ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1753 	  }
1754 	}
1755       }
1756     }
1757     ierr = PetscFree(values);CHKERRQ(ierr);
1758     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1759     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1760   }
1761   ierr = PetscFree(cols);CHKERRQ(ierr);
1762   PetscFunctionReturn(0);
1763 }
1764