xref: /petsc/src/dm/impls/da/fdda.c (revision 09573ac72a50d3e7ecd55a2b7f0ef28450cd0a8b)
1 #define PETSCDM_DLL
2 
3 #include "private/daimpl.h" /*I      "petscdm.h"     I*/
4 #include "petscmat.h"         /*I      "petscmat.h"    I*/
5 #include "private/matimpl.h"
6 
7 extern PetscErrorCode DMGetColoring_DA_1d_MPIAIJ(DM,ISColoringType,ISColoring *);
8 extern PetscErrorCode DMGetColoring_DA_2d_MPIAIJ(DM,ISColoringType,ISColoring *);
9 extern PetscErrorCode DMGetColoring_DA_2d_5pt_MPIAIJ(DM,ISColoringType,ISColoring *);
10 extern PetscErrorCode DMGetColoring_DA_3d_MPIAIJ(DM,ISColoringType,ISColoring *);
11 
12 /*
13    For ghost i that may be negative or greater than the upper bound this
14   maps it into the 0:m-1 range using periodicity
15 */
16 #define SetInRange(i,m) ((i < 0) ? m+i:((i >= m) ? i-m:i))
17 
18 #undef __FUNCT__
19 #define __FUNCT__ "DMDASetBlockFills_Private"
20 static PetscErrorCode DMDASetBlockFills_Private(PetscInt *dfill,PetscInt w,PetscInt **rfill)
21 {
22   PetscErrorCode ierr;
23   PetscInt       i,j,nz,*fill;
24 
25   PetscFunctionBegin;
26   if (!dfill) PetscFunctionReturn(0);
27 
28   /* count number nonzeros */
29   nz = 0;
30   for (i=0; i<w; i++) {
31     for (j=0; j<w; j++) {
32       if (dfill[w*i+j]) nz++;
33     }
34   }
35   ierr = PetscMalloc((nz + w + 1)*sizeof(PetscInt),&fill);CHKERRQ(ierr);
36   /* construct modified CSR storage of nonzero structure */
37   nz = w + 1;
38   for (i=0; i<w; i++) {
39     fill[i] = nz;
40     for (j=0; j<w; j++) {
41       if (dfill[w*i+j]) {
42 	fill[nz] = j;
43 	nz++;
44       }
45     }
46   }
47   fill[w] = nz;
48 
49   *rfill = fill;
50   PetscFunctionReturn(0);
51 }
52 
53 #undef __FUNCT__
54 #define __FUNCT__ "DMDASetMatPreallocateOnly"
55 /*@
56     DMDASetMatPreallocateOnly - When DMGetMatrix() is called the matrix will be properly
57        preallocated but the nonzero structure and zero values will not be set.
58 
59     Logically Collective on DMDA
60 
61     Input Parameter:
62 +   da - the distributed array
63 -   only - PETSC_TRUE if only want preallocation
64 
65 
66     Level: developer
67 
68 .seealso DMGetMatrix(), DMDASetGetMatrix(), DMDASetBlockSize(), DMDASetBlockFills()
69 
70 @*/
71 PetscErrorCode PETSCDM_DLLEXPORT DMDASetMatPreallocateOnly(DM da,PetscBool  only)
72 {
73   DM_DA *dd = (DM_DA*)da->data;
74 
75   PetscFunctionBegin;
76   dd->prealloc_only = only;
77   PetscFunctionReturn(0);
78 }
79 
80 #undef __FUNCT__
81 #define __FUNCT__ "DMDASetBlockFills"
82 /*@
83     DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem
84     of the matrix returned by DMGetMatrix().
85 
86     Logically Collective on DMDA
87 
88     Input Parameter:
89 +   da - the distributed array
90 .   dfill - the fill pattern in the diagonal block (may be PETSC_NULL, means use dense block)
91 -   ofill - the fill pattern in the off-diagonal blocks
92 
93 
94     Level: developer
95 
96     Notes: This only makes sense when you are doing multicomponent problems but using the
97        MPIAIJ matrix format
98 
99            The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries
100        representing coupling and 0 entries for missing coupling. For example
101 $             dfill[9] = {1, 0, 0,
102 $                         1, 1, 0,
103 $                         0, 1, 1}
104        means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with
105        itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the
106        diagonal block).
107 
108      DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
109      can be represented in the dfill, ofill format
110 
111    Contributed by Glenn Hammond
112 
113 .seealso DMGetMatrix(), DMDASetGetMatrix(), DMDASetMatPreallocateOnly()
114 
115 @*/
116 PetscErrorCode PETSCDM_DLLEXPORT DMDASetBlockFills(DM da,PetscInt *dfill,PetscInt *ofill)
117 {
118   DM_DA          *dd = (DM_DA*)da->data;
119   PetscErrorCode ierr;
120 
121   PetscFunctionBegin;
122   ierr = DMDASetBlockFills_Private(dfill,dd->w,&dd->dfill);CHKERRQ(ierr);
123   ierr = DMDASetBlockFills_Private(ofill,dd->w,&dd->ofill);CHKERRQ(ierr);
124   PetscFunctionReturn(0);
125 }
126 
127 
128 #undef __FUNCT__
129 #define __FUNCT__ "DMGetColoring_DA"
130 PetscErrorCode PETSCDM_DLLEXPORT DMGetColoring_DA(DM da,ISColoringType ctype,const MatType mtype,ISColoring *coloring)
131 {
132   PetscErrorCode   ierr;
133   PetscInt         dim,m,n,p,nc;
134   DMDAPeriodicType wrap;
135   MPI_Comm         comm;
136   PetscMPIInt      size;
137   PetscBool        isBAIJ;
138   DM_DA            *dd = (DM_DA*)da->data;
139 
140   PetscFunctionBegin;
141   /*
142                                   m
143           ------------------------------------------------------
144          |                                                     |
145          |                                                     |
146          |               ----------------------                |
147          |               |                    |                |
148       n  |           yn  |                    |                |
149          |               |                    |                |
150          |               .---------------------                |
151          |             (xs,ys)     xn                          |
152          |            .                                        |
153          |         (gxs,gys)                                   |
154          |                                                     |
155           -----------------------------------------------------
156   */
157 
158   /*
159          nc - number of components per grid point
160          col - number of colors needed in one direction for single component problem
161 
162   */
163   ierr = DMDAGetInfo(da,&dim,0,0,0,&m,&n,&p,&nc,0,&wrap,0);CHKERRQ(ierr);
164 
165   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
166   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
167   if (ctype == IS_COLORING_GHOSTED){
168     if (size == 1) {
169       ctype = IS_COLORING_GLOBAL;
170     } else if (dim > 1){
171       if ((m==1 && DMDAXPeriodic(wrap)) || (n==1 && DMDAYPeriodic(wrap)) || (p==1 && DMDAZPeriodic(wrap))){
172         SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"IS_COLORING_GHOSTED cannot be used for periodic boundary condition having both ends of the domain  on the same process");
173       }
174     }
175   }
176 
177   /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ
178      matrices is for the blocks, not the individual matrix elements  */
179   ierr = PetscStrcmp(mtype,MATBAIJ,&isBAIJ);CHKERRQ(ierr);
180   if (!isBAIJ) {ierr = PetscStrcmp(mtype,MATMPIBAIJ,&isBAIJ);CHKERRQ(ierr);}
181   if (!isBAIJ) {ierr = PetscStrcmp(mtype,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);}
182   if (isBAIJ) {
183     dd->w = 1;
184     dd->xs = dd->xs/nc;
185     dd->xe = dd->xe/nc;
186     dd->Xs = dd->Xs/nc;
187     dd->Xe = dd->Xe/nc;
188   }
189 
190   /*
191      We do not provide a getcoloring function in the DMDA operations because
192    the basic DMDA does not know about matrices. We think of DMDA as being more
193    more low-level then matrices.
194   */
195   if (dim == 1) {
196     ierr = DMGetColoring_DA_1d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
197   } else if (dim == 2) {
198     ierr =  DMGetColoring_DA_2d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
199   } else if (dim == 3) {
200     ierr =  DMGetColoring_DA_3d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
201   } else {
202     SETERRQ1(((PetscObject)da)->comm,PETSC_ERR_SUP,"Not done for %D dimension, send us mail petsc-maint@mcs.anl.gov for code",dim);
203   }
204   if (isBAIJ) {
205     dd->w = nc;
206     dd->xs = dd->xs*nc;
207     dd->xe = dd->xe*nc;
208     dd->Xs = dd->Xs*nc;
209     dd->Xe = dd->Xe*nc;
210   }
211   PetscFunctionReturn(0);
212 }
213 
214 /* ---------------------------------------------------------------------------------*/
215 
216 #undef __FUNCT__
217 #define __FUNCT__ "DMGetColoring_DA_2d_MPIAIJ"
218 PetscErrorCode DMGetColoring_DA_2d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
219 {
220   PetscErrorCode         ierr;
221   PetscInt               xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,M,N,dim,s,k,nc,col;
222   PetscInt               ncolors;
223   MPI_Comm               comm;
224   DMDAPeriodicType       wrap;
225   DMDAStencilType        st;
226   ISColoringValue        *colors;
227   DM_DA                  *dd = (DM_DA*)da->data;
228 
229   PetscFunctionBegin;
230   /*
231          nc - number of components per grid point
232          col - number of colors needed in one direction for single component problem
233 
234   */
235   ierr = DMDAGetInfo(da,&dim,&m,&n,0,&M,&N,0,&nc,&s,&wrap,&st);CHKERRQ(ierr);
236   col    = 2*s + 1;
237   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
238   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
239   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
240 
241   /* special case as taught to us by Paul Hovland */
242   if (st == DMDA_STENCIL_STAR && s == 1) {
243     ierr = DMGetColoring_DA_2d_5pt_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
244   } else {
245 
246     if (DMDAXPeriodic(wrap) && (m % col)){
247       SETERRQ2(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X (%d) is divisible\n\
248                  by 2*stencil_width + 1 (%d)\n", m, col);
249     }
250     if (DMDAYPeriodic(wrap) && (n % col)){
251       SETERRQ2(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y (%d) is divisible\n\
252                  by 2*stencil_width + 1 (%d)\n", n, col);
253     }
254     if (ctype == IS_COLORING_GLOBAL) {
255       if (!dd->localcoloring) {
256 	ierr = PetscMalloc(nc*nx*ny*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
257 	ii = 0;
258 	for (j=ys; j<ys+ny; j++) {
259 	  for (i=xs; i<xs+nx; i++) {
260 	    for (k=0; k<nc; k++) {
261 	      colors[ii++] = k + nc*((i % col) + col*(j % col));
262 	    }
263 	  }
264 	}
265         ncolors = nc + nc*(col-1 + col*(col-1));
266 	ierr = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,&dd->localcoloring);CHKERRQ(ierr);
267       }
268       *coloring = dd->localcoloring;
269     } else if (ctype == IS_COLORING_GHOSTED) {
270       if (!dd->ghostedcoloring) {
271 	ierr = PetscMalloc(nc*gnx*gny*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
272 	ii = 0;
273 	for (j=gys; j<gys+gny; j++) {
274 	  for (i=gxs; i<gxs+gnx; i++) {
275 	    for (k=0; k<nc; k++) {
276 	      /* the complicated stuff is to handle periodic boundaries */
277 	      colors[ii++] = k + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col));
278 	    }
279 	  }
280 	}
281         ncolors = nc + nc*(col - 1 + col*(col-1));
282 	ierr = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,&dd->ghostedcoloring);CHKERRQ(ierr);
283         /* PetscIntView(ncolors,(PetscInt *)colors,0); */
284 
285 	ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_GHOSTED);CHKERRQ(ierr);
286       }
287       *coloring = dd->ghostedcoloring;
288     } else SETERRQ1(((PetscObject)da)->comm,PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
289   }
290   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
291   PetscFunctionReturn(0);
292 }
293 
294 /* ---------------------------------------------------------------------------------*/
295 
296 #undef __FUNCT__
297 #define __FUNCT__ "DMGetColoring_DA_3d_MPIAIJ"
298 PetscErrorCode DMGetColoring_DA_3d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
299 {
300   PetscErrorCode    ierr;
301   PetscInt          xs,ys,nx,ny,i,j,gxs,gys,gnx,gny,m,n,p,dim,s,k,nc,col,zs,gzs,ii,l,nz,gnz,M,N,P;
302   PetscInt          ncolors;
303   MPI_Comm          comm;
304   DMDAPeriodicType  wrap;
305   DMDAStencilType   st;
306   ISColoringValue   *colors;
307   DM_DA             *dd = (DM_DA*)da->data;
308 
309   PetscFunctionBegin;
310   /*
311          nc - number of components per grid point
312          col - number of colors needed in one direction for single component problem
313 
314   */
315   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&wrap,&st);CHKERRQ(ierr);
316   col    = 2*s + 1;
317   if (DMDAXPeriodic(wrap) && (m % col)){
318     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
319                  by 2*stencil_width + 1\n");
320   }
321   if (DMDAYPeriodic(wrap) && (n % col)){
322     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
323                  by 2*stencil_width + 1\n");
324   }
325   if (DMDAZPeriodic(wrap) && (p % col)){
326     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
327                  by 2*stencil_width + 1\n");
328   }
329 
330   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
331   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
332   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
333 
334   /* create the coloring */
335   if (ctype == IS_COLORING_GLOBAL) {
336     if (!dd->localcoloring) {
337       ierr = PetscMalloc(nc*nx*ny*nz*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
338       ii = 0;
339       for (k=zs; k<zs+nz; k++) {
340         for (j=ys; j<ys+ny; j++) {
341           for (i=xs; i<xs+nx; i++) {
342             for (l=0; l<nc; l++) {
343               colors[ii++] = l + nc*((i % col) + col*(j % col) + col*col*(k % col));
344             }
345           }
346         }
347       }
348       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
349       ierr = ISColoringCreate(comm,ncolors,nc*nx*ny*nz,colors,&dd->localcoloring);CHKERRQ(ierr);
350     }
351     *coloring = dd->localcoloring;
352   } else if (ctype == IS_COLORING_GHOSTED) {
353     if (!dd->ghostedcoloring) {
354       ierr = PetscMalloc(nc*gnx*gny*gnz*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
355       ii = 0;
356       for (k=gzs; k<gzs+gnz; k++) {
357         for (j=gys; j<gys+gny; j++) {
358           for (i=gxs; i<gxs+gnx; i++) {
359             for (l=0; l<nc; l++) {
360               /* the complicated stuff is to handle periodic boundaries */
361               colors[ii++] = l + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col) + col*col*(SetInRange(k,p) % col));
362             }
363           }
364         }
365       }
366       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
367       ierr = ISColoringCreate(comm,ncolors,nc*gnx*gny*gnz,colors,&dd->ghostedcoloring);CHKERRQ(ierr);
368       ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_GHOSTED);CHKERRQ(ierr);
369     }
370     *coloring = dd->ghostedcoloring;
371   } else SETERRQ1(((PetscObject)da)->comm,PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
372   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
373   PetscFunctionReturn(0);
374 }
375 
376 /* ---------------------------------------------------------------------------------*/
377 
378 #undef __FUNCT__
379 #define __FUNCT__ "DMGetColoring_DA_1d_MPIAIJ"
380 PetscErrorCode DMGetColoring_DA_1d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
381 {
382   PetscErrorCode    ierr;
383   PetscInt          xs,nx,i,i1,gxs,gnx,l,m,M,dim,s,nc,col;
384   PetscInt          ncolors;
385   MPI_Comm          comm;
386   DMDAPeriodicType  wrap;
387   ISColoringValue   *colors;
388   DM_DA             *dd = (DM_DA*)da->data;
389 
390   PetscFunctionBegin;
391   /*
392          nc - number of components per grid point
393          col - number of colors needed in one direction for single component problem
394 
395   */
396   ierr = DMDAGetInfo(da,&dim,&m,0,0,&M,0,0,&nc,&s,&wrap,0);CHKERRQ(ierr);
397   col    = 2*s + 1;
398 
399   if (DMDAXPeriodic(wrap) && (m % col)) {
400     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points is divisible\n\
401                  by 2*stencil_width + 1\n");
402   }
403 
404   ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
405   ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
406   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
407 
408   /* create the coloring */
409   if (ctype == IS_COLORING_GLOBAL) {
410     if (!dd->localcoloring) {
411       ierr = PetscMalloc(nc*nx*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
412       i1 = 0;
413       for (i=xs; i<xs+nx; i++) {
414         for (l=0; l<nc; l++) {
415           colors[i1++] = l + nc*(i % col);
416         }
417       }
418       ncolors = nc + nc*(col-1);
419       ierr = ISColoringCreate(comm,ncolors,nc*nx,colors,&dd->localcoloring);CHKERRQ(ierr);
420     }
421     *coloring = dd->localcoloring;
422   } else if (ctype == IS_COLORING_GHOSTED) {
423     if (!dd->ghostedcoloring) {
424       ierr = PetscMalloc(nc*gnx*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
425       i1 = 0;
426       for (i=gxs; i<gxs+gnx; i++) {
427         for (l=0; l<nc; l++) {
428           /* the complicated stuff is to handle periodic boundaries */
429           colors[i1++] = l + nc*(SetInRange(i,m) % col);
430         }
431       }
432       ncolors = nc + nc*(col-1);
433       ierr = ISColoringCreate(comm,ncolors,nc*gnx,colors,&dd->ghostedcoloring);CHKERRQ(ierr);
434       ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_GHOSTED);CHKERRQ(ierr);
435     }
436     *coloring = dd->ghostedcoloring;
437   } else SETERRQ1(((PetscObject)da)->comm,PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
438   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
439   PetscFunctionReturn(0);
440 }
441 
442 #undef __FUNCT__
443 #define __FUNCT__ "DMGetColoring_DA_2d_5pt_MPIAIJ"
444 PetscErrorCode DMGetColoring_DA_2d_5pt_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
445 {
446   PetscErrorCode    ierr;
447   PetscInt          xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,dim,s,k,nc;
448   PetscInt          ncolors;
449   MPI_Comm          comm;
450   DMDAPeriodicType  wrap;
451   ISColoringValue   *colors;
452   DM_DA             *dd = (DM_DA*)da->data;
453 
454   PetscFunctionBegin;
455   /*
456          nc - number of components per grid point
457          col - number of colors needed in one direction for single component problem
458 
459   */
460   ierr   = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&wrap,0);CHKERRQ(ierr);
461   ierr   = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
462   ierr   = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
463   ierr   = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
464 
465   if (DMDAXPeriodic(wrap) && (m % 5)){
466     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
467                  by 5\n");
468   }
469   if (DMDAYPeriodic(wrap) && (n % 5)){
470     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
471                  by 5\n");
472   }
473 
474   /* create the coloring */
475   if (ctype == IS_COLORING_GLOBAL) {
476     if (!dd->localcoloring) {
477       ierr = PetscMalloc(nc*nx*ny*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
478       ii = 0;
479       for (j=ys; j<ys+ny; j++) {
480 	for (i=xs; i<xs+nx; i++) {
481 	  for (k=0; k<nc; k++) {
482 	    colors[ii++] = k + nc*((3*j+i) % 5);
483 	  }
484 	}
485       }
486       ncolors = 5*nc;
487       ierr = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,&dd->localcoloring);CHKERRQ(ierr);
488     }
489     *coloring = dd->localcoloring;
490   } else if (ctype == IS_COLORING_GHOSTED) {
491     if (!dd->ghostedcoloring) {
492       ierr = PetscMalloc(nc*gnx*gny*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
493       ii = 0;
494       for (j=gys; j<gys+gny; j++) {
495 	for (i=gxs; i<gxs+gnx; i++) {
496 	  for (k=0; k<nc; k++) {
497 	    colors[ii++] = k + nc*((3*SetInRange(j,n) + SetInRange(i,m)) % 5);
498 	  }
499 	}
500       }
501       ncolors = 5*nc;
502       ierr = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,&dd->ghostedcoloring);CHKERRQ(ierr);
503       ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_GHOSTED);CHKERRQ(ierr);
504     }
505     *coloring = dd->ghostedcoloring;
506   } else SETERRQ1(((PetscObject)da)->comm,PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
507   PetscFunctionReturn(0);
508 }
509 
510 /* =========================================================================== */
511 extern PetscErrorCode DMGetMatrix_DA_1d_MPIAIJ(DM,Mat);
512 extern PetscErrorCode DMGetMatrix_DA_2d_MPIAIJ(DM,Mat);
513 extern PetscErrorCode DMGetMatrix_DA_2d_MPIAIJ_Fill(DM,Mat);
514 extern PetscErrorCode DMGetMatrix_DA_3d_MPIAIJ(DM,Mat);
515 extern PetscErrorCode DMGetMatrix_DA_3d_MPIAIJ_Fill(DM,Mat);
516 extern PetscErrorCode DMGetMatrix_DA_2d_MPIBAIJ(DM,Mat);
517 extern PetscErrorCode DMGetMatrix_DA_3d_MPIBAIJ(DM,Mat);
518 extern PetscErrorCode DMGetMatrix_DA_2d_MPISBAIJ(DM,Mat);
519 extern PetscErrorCode DMGetMatrix_DA_3d_MPISBAIJ(DM,Mat);
520 
521 #undef __FUNCT__
522 #define __FUNCT__ "MatSetDA"
523 /*@
524    MatSetDA - Sets the DMDA that is to be used by the HYPRE_StructMatrix PETSc matrix
525 
526    Logically Collective on Mat
527 
528    Input Parameters:
529 +  mat - the matrix
530 -  da - the da
531 
532    Level: intermediate
533 
534 @*/
535 PetscErrorCode PETSCDM_DLLEXPORT MatSetDA(Mat mat,DM da)
536 {
537   PetscErrorCode ierr;
538 
539   PetscFunctionBegin;
540   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
541   PetscValidHeaderSpecific(da,DM_CLASSID,1);
542   ierr = PetscTryMethod(mat,"MatSetDA_C",(Mat,DM),(mat,da));CHKERRQ(ierr);
543   PetscFunctionReturn(0);
544 }
545 
546 EXTERN_C_BEGIN
547 #undef __FUNCT__
548 #define __FUNCT__ "MatView_MPI_DA"
549 PetscErrorCode PETSCDM_DLLEXPORT MatView_MPI_DA(Mat A,PetscViewer viewer)
550 {
551   DM             da;
552   PetscErrorCode ierr;
553   const char     *prefix;
554   Mat            Anatural;
555   AO             ao;
556   PetscInt       rstart,rend,*petsc,i;
557   IS             is;
558   MPI_Comm       comm;
559 
560   PetscFunctionBegin;
561   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
562   ierr = PetscObjectQuery((PetscObject)A,"DMDA",(PetscObject*)&da);CHKERRQ(ierr);
563   if (!da) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
564 
565   ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);
566   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
567   ierr = PetscMalloc((rend-rstart)*sizeof(PetscInt),&petsc);CHKERRQ(ierr);
568   for (i=rstart; i<rend; i++) petsc[i-rstart] = i;
569   ierr = AOApplicationToPetsc(ao,rend-rstart,petsc);CHKERRQ(ierr);
570   ierr = ISCreateGeneral(comm,rend-rstart,petsc,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
571 
572   /* call viewer on natural ordering */
573   ierr = MatGetSubMatrix(A,is,is,MAT_INITIAL_MATRIX,&Anatural);CHKERRQ(ierr);
574   ierr = ISDestroy(is);CHKERRQ(ierr);
575   ierr = PetscObjectGetOptionsPrefix((PetscObject)A,&prefix);CHKERRQ(ierr);
576   ierr = PetscObjectSetOptionsPrefix((PetscObject)Anatural,prefix);CHKERRQ(ierr);
577   ierr = PetscObjectSetName((PetscObject)Anatural,((PetscObject)A)->name);CHKERRQ(ierr);
578   ierr = MatView(Anatural,viewer);CHKERRQ(ierr);
579   ierr = MatDestroy(Anatural);CHKERRQ(ierr);
580   PetscFunctionReturn(0);
581 }
582 EXTERN_C_END
583 
584 EXTERN_C_BEGIN
585 #undef __FUNCT__
586 #define __FUNCT__ "MatLoad_MPI_DA"
587 PetscErrorCode PETSCDM_DLLEXPORT MatLoad_MPI_DA(Mat A,PetscViewer viewer)
588 {
589   DM             da;
590   PetscErrorCode ierr;
591   Mat            Anatural,Aapp;
592   AO             ao;
593   PetscInt       rstart,rend,*app,i;
594   IS             is;
595   MPI_Comm       comm;
596 
597   PetscFunctionBegin;
598   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
599   ierr = PetscObjectQuery((PetscObject)A,"DMDA",(PetscObject*)&da);CHKERRQ(ierr);
600   if (!da) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
601 
602   /* Load the matrix in natural ordering */
603   ierr = MatCreate(((PetscObject)A)->comm,&Anatural);CHKERRQ(ierr);
604   ierr = MatSetType(Anatural,((PetscObject)A)->type_name);CHKERRQ(ierr);
605   ierr = MatSetSizes(Anatural,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
606   ierr = MatLoad(Anatural,viewer);CHKERRQ(ierr);
607 
608   /* Map natural ordering to application ordering and create IS */
609   ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);
610   ierr = MatGetOwnershipRange(Anatural,&rstart,&rend);CHKERRQ(ierr);
611   ierr = PetscMalloc((rend-rstart)*sizeof(PetscInt),&app);CHKERRQ(ierr);
612   for (i=rstart; i<rend; i++) app[i-rstart] = i;
613   ierr = AOPetscToApplication(ao,rend-rstart,app);CHKERRQ(ierr);
614   ierr = ISCreateGeneral(comm,rend-rstart,app,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
615 
616   /* Do permutation and replace header */
617   ierr = MatGetSubMatrix(Anatural,is,is,MAT_INITIAL_MATRIX,&Aapp);CHKERRQ(ierr);
618   ierr = MatHeaderReplace(A,Aapp);CHKERRQ(ierr);
619   ierr = ISDestroy(is);CHKERRQ(ierr);
620   ierr = MatDestroy(Anatural);CHKERRQ(ierr);
621   PetscFunctionReturn(0);
622 }
623 EXTERN_C_END
624 
625 #undef __FUNCT__
626 #define __FUNCT__ "DMGetMatrix_DA"
627 PetscErrorCode PETSCDM_DLLEXPORT DMGetMatrix_DA(DM da, const MatType mtype,Mat *J)
628 {
629   PetscErrorCode ierr;
630   PetscInt       dim,dof,nx,ny,nz,dims[3],starts[3],M,N,P;
631   Mat            A;
632   MPI_Comm       comm;
633   const MatType  Atype;
634   void           (*aij)(void)=PETSC_NULL,(*baij)(void)=PETSC_NULL,(*sbaij)(void)=PETSC_NULL;
635   MatType        ttype[256];
636   PetscBool      flg;
637   PetscMPIInt    size;
638   DM_DA          *dd = (DM_DA*)da->data;
639 
640   PetscFunctionBegin;
641 #ifndef PETSC_USE_DYNAMIC_LIBRARIES
642   ierr = MatInitializePackage(PETSC_NULL);CHKERRQ(ierr);
643 #endif
644   if (!mtype) mtype = MATAIJ;
645   ierr = PetscStrcpy((char*)ttype,mtype);CHKERRQ(ierr);
646   ierr = PetscOptionsBegin(((PetscObject)da)->comm,((PetscObject)da)->prefix,"DMDA options","Mat");CHKERRQ(ierr);
647   ierr = PetscOptionsList("-da_mat_type","Matrix type","MatSetType",MatList,mtype,(char*)ttype,256,&flg);CHKERRQ(ierr);
648   ierr = PetscOptionsEnd();
649 
650   /*
651                                   m
652           ------------------------------------------------------
653          |                                                     |
654          |                                                     |
655          |               ----------------------                |
656          |               |                    |                |
657       n  |           ny  |                    |                |
658          |               |                    |                |
659          |               .---------------------                |
660          |             (xs,ys)     nx                          |
661          |            .                                        |
662          |         (gxs,gys)                                   |
663          |                                                     |
664           -----------------------------------------------------
665   */
666 
667   /*
668          nc - number of components per grid point
669          col - number of colors needed in one direction for single component problem
670 
671   */
672   ierr = DMDAGetInfo(da,&dim,&M,&N,&P,0,0,0,&dof,0,0,0);CHKERRQ(ierr);
673   ierr = DMDAGetCorners(da,0,0,0,&nx,&ny,&nz);CHKERRQ(ierr);
674   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
675   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
676   ierr = MatSetSizes(A,dof*nx*ny*nz,dof*nx*ny*nz,dof*M*N*P,dof*M*N*P);CHKERRQ(ierr);
677   ierr = MatSetType(A,(const MatType)ttype);CHKERRQ(ierr);
678   ierr = MatSetDA(A,da);CHKERRQ(ierr);
679   ierr = MatSetFromOptions(A);CHKERRQ(ierr);
680   ierr = MatGetType(A,&Atype);CHKERRQ(ierr);
681   /*
682      We do not provide a getmatrix function in the DMDA operations because
683    the basic DMDA does not know about matrices. We think of DMDA as being more
684    more low-level than matrices. This is kind of cheating but, cause sometimes
685    we think of DMDA has higher level than matrices.
686 
687      We could switch based on Atype (or mtype), but we do not since the
688    specialized setting routines depend only the particular preallocation
689    details of the matrix, not the type itself.
690   */
691   ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJSetPreallocation_C",&aij);CHKERRQ(ierr);
692   if (!aij) {
693     ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJSetPreallocation_C",&aij);CHKERRQ(ierr);
694   }
695   if (!aij) {
696     ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIBAIJSetPreallocation_C",&baij);CHKERRQ(ierr);
697     if (!baij) {
698       ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",&baij);CHKERRQ(ierr);
699     }
700     if (!baij){
701       ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPISBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr);
702       if (!sbaij) {
703         ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr);
704       }
705     }
706   }
707   if (aij) {
708     if (dim == 1) {
709       ierr = DMGetMatrix_DA_1d_MPIAIJ(da,A);CHKERRQ(ierr);
710     } else if (dim == 2) {
711       if (dd->ofill) {
712         ierr = DMGetMatrix_DA_2d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
713       } else {
714         ierr = DMGetMatrix_DA_2d_MPIAIJ(da,A);CHKERRQ(ierr);
715       }
716     } else if (dim == 3) {
717       if (dd->ofill) {
718         ierr = DMGetMatrix_DA_3d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
719       } else {
720         ierr = DMGetMatrix_DA_3d_MPIAIJ(da,A);CHKERRQ(ierr);
721       }
722     }
723   } else if (baij) {
724     if (dim == 2) {
725       ierr = DMGetMatrix_DA_2d_MPIBAIJ(da,A);CHKERRQ(ierr);
726     } else if (dim == 3) {
727       ierr = DMGetMatrix_DA_3d_MPIBAIJ(da,A);CHKERRQ(ierr);
728     } else {
729       SETERRQ3(((PetscObject)da)->comm,PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension!\n" \
730 	       "Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
731     }
732   } else if (sbaij) {
733     if (dim == 2) {
734       ierr = DMGetMatrix_DA_2d_MPISBAIJ(da,A);CHKERRQ(ierr);
735     } else if (dim == 3) {
736       ierr = DMGetMatrix_DA_3d_MPISBAIJ(da,A);CHKERRQ(ierr);
737     } else {
738       SETERRQ3(((PetscObject)da)->comm,PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension!\n" \
739 	       "Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
740     }
741   }
742   ierr = DMDAGetGhostCorners(da,&starts[0],&starts[1],&starts[2],&dims[0],&dims[1],&dims[2]);CHKERRQ(ierr);
743   ierr = MatSetStencil(A,dim,dims,starts,dof);CHKERRQ(ierr);
744   ierr = PetscObjectCompose((PetscObject)A,"DMDA",(PetscObject)da);CHKERRQ(ierr);
745   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
746   if (size > 1) {
747     /* change viewer to display matrix in natural ordering */
748     ierr = MatShellSetOperation(A, MATOP_VIEW, (void (*)(void)) MatView_MPI_DA);CHKERRQ(ierr);
749     ierr = MatShellSetOperation(A, MATOP_LOAD, (void (*)(void)) MatLoad_MPI_DA);CHKERRQ(ierr);
750   }
751   *J = A;
752   PetscFunctionReturn(0);
753 }
754 
755 /* ---------------------------------------------------------------------------------*/
756 #undef __FUNCT__
757 #define __FUNCT__ "DMGetMatrix_DA_2d_MPIAIJ"
758 PetscErrorCode DMGetMatrix_DA_2d_MPIAIJ(DM da,Mat J)
759 {
760   PetscErrorCode         ierr;
761   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = PETSC_NULL,k,nc,*rows = PETSC_NULL,col,cnt,l,p;
762   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
763   MPI_Comm               comm;
764   PetscScalar            *values;
765   DMDAPeriodicType       wrap;
766   ISLocalToGlobalMapping ltog,ltogb;
767   DMDAStencilType        st;
768   DM_DA                  *dd = (DM_DA*)da->data;
769 
770   PetscFunctionBegin;
771   /*
772          nc - number of components per grid point
773          col - number of colors needed in one direction for single component problem
774 
775   */
776   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&wrap,&st);CHKERRQ(ierr);
777   col = 2*s + 1;
778   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
779   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
780   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
781 
782   ierr = PetscMalloc2(nc,PetscInt,&rows,col*col*nc*nc,PetscInt,&cols);CHKERRQ(ierr);
783   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
784   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
785 
786   /* determine the matrix preallocation information */
787   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
788   for (i=xs; i<xs+nx; i++) {
789 
790     pstart = DMDAXPeriodic(wrap) ? -s : (PetscMax(-s,-i));
791     pend   = DMDAXPeriodic(wrap) ?  s : (PetscMin(s,m-i-1));
792 
793     for (j=ys; j<ys+ny; j++) {
794       slot = i - gxs + gnx*(j - gys);
795 
796       lstart = DMDAYPeriodic(wrap) ? -s : (PetscMax(-s,-j));
797       lend   = DMDAYPeriodic(wrap) ?  s : (PetscMin(s,n-j-1));
798 
799       cnt  = 0;
800       for (k=0; k<nc; k++) {
801 	for (l=lstart; l<lend+1; l++) {
802 	  for (p=pstart; p<pend+1; p++) {
803 	    if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
804 	      cols[cnt++]  = k + nc*(slot + gnx*l + p);
805 	    }
806 	  }
807 	}
808 	rows[k] = k + nc*(slot);
809       }
810       ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
811     }
812   }
813   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
814   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
815   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
816   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
817 
818   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
819   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
820 
821   /*
822     For each node in the grid: we get the neighbors in the local (on processor ordering
823     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
824     PETSc ordering.
825   */
826   if (!dd->prealloc_only) {
827     ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
828     ierr = PetscMemzero(values,col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
829     for (i=xs; i<xs+nx; i++) {
830 
831       pstart = DMDAXPeriodic(wrap) ? -s : (PetscMax(-s,-i));
832       pend   = DMDAXPeriodic(wrap) ?  s : (PetscMin(s,m-i-1));
833 
834       for (j=ys; j<ys+ny; j++) {
835 	slot = i - gxs + gnx*(j - gys);
836 
837 	lstart = DMDAYPeriodic(wrap) ? -s : (PetscMax(-s,-j));
838 	lend   = DMDAYPeriodic(wrap) ?  s : (PetscMin(s,n-j-1));
839 
840 	cnt  = 0;
841 	for (k=0; k<nc; k++) {
842 	  for (l=lstart; l<lend+1; l++) {
843 	    for (p=pstart; p<pend+1; p++) {
844 	      if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
845 		cols[cnt++]  = k + nc*(slot + gnx*l + p);
846 	      }
847 	    }
848 	  }
849 	  rows[k]      = k + nc*(slot);
850 	}
851 	ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
852       }
853     }
854     ierr = PetscFree(values);CHKERRQ(ierr);
855     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
856     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
857   }
858   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
859   PetscFunctionReturn(0);
860 }
861 
862 #undef __FUNCT__
863 #define __FUNCT__ "DMGetMatrix_DA_2d_MPIAIJ_Fill"
864 PetscErrorCode DMGetMatrix_DA_2d_MPIAIJ_Fill(DM da,Mat J)
865 {
866   PetscErrorCode         ierr;
867   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
868   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt,l,p;
869   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
870   DM_DA                  *dd = (DM_DA*)da->data;
871   PetscInt               ifill_col,*ofill = dd->ofill, *dfill = dd->dfill;
872   MPI_Comm               comm;
873   PetscScalar            *values;
874   DMDAPeriodicType       wrap;
875   ISLocalToGlobalMapping ltog,ltogb;
876   DMDAStencilType        st;
877 
878   PetscFunctionBegin;
879   /*
880          nc - number of components per grid point
881          col - number of colors needed in one direction for single component problem
882 
883   */
884   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&wrap,&st);CHKERRQ(ierr);
885   col = 2*s + 1;
886   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
887   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
888   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
889 
890   ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscInt),&cols);CHKERRQ(ierr);
891   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
892   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
893 
894   /* determine the matrix preallocation information */
895   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
896   for (i=xs; i<xs+nx; i++) {
897 
898     pstart = DMDAXPeriodic(wrap) ? -s : (PetscMax(-s,-i));
899     pend   = DMDAXPeriodic(wrap) ?  s : (PetscMin(s,m-i-1));
900 
901     for (j=ys; j<ys+ny; j++) {
902       slot = i - gxs + gnx*(j - gys);
903 
904       lstart = DMDAYPeriodic(wrap) ? -s : (PetscMax(-s,-j));
905       lend   = DMDAYPeriodic(wrap) ?  s : (PetscMin(s,n-j-1));
906 
907       for (k=0; k<nc; k++) {
908         cnt  = 0;
909 	for (l=lstart; l<lend+1; l++) {
910 	  for (p=pstart; p<pend+1; p++) {
911             if (l || p) {
912 	      if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
913                 for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++)
914 		  cols[cnt++]  = ofill[ifill_col] + nc*(slot + gnx*l + p);
915 	      }
916             } else {
917 	      if (dfill) {
918 		for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++)
919 		  cols[cnt++]  = dfill[ifill_col] + nc*(slot + gnx*l + p);
920 	      } else {
921 		for (ifill_col=0; ifill_col<nc; ifill_col++)
922 		  cols[cnt++]  = ifill_col + nc*(slot + gnx*l + p);
923 	      }
924             }
925 	  }
926 	}
927 	row = k + nc*(slot);
928         ierr = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
929       }
930     }
931   }
932   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
933   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
934   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
935   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
936   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
937 
938   /*
939     For each node in the grid: we get the neighbors in the local (on processor ordering
940     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
941     PETSc ordering.
942   */
943   if (!dd->prealloc_only) {
944     ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
945     ierr = PetscMemzero(values,col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
946     for (i=xs; i<xs+nx; i++) {
947 
948       pstart = DMDAXPeriodic(wrap) ? -s : (PetscMax(-s,-i));
949       pend   = DMDAXPeriodic(wrap) ?  s : (PetscMin(s,m-i-1));
950 
951       for (j=ys; j<ys+ny; j++) {
952 	slot = i - gxs + gnx*(j - gys);
953 
954 	lstart = DMDAYPeriodic(wrap) ? -s : (PetscMax(-s,-j));
955 	lend   = DMDAYPeriodic(wrap) ?  s : (PetscMin(s,n-j-1));
956 
957 	for (k=0; k<nc; k++) {
958 	  cnt  = 0;
959 	  for (l=lstart; l<lend+1; l++) {
960 	    for (p=pstart; p<pend+1; p++) {
961 	      if (l || p) {
962 		if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
963 		  for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++)
964 		    cols[cnt++]  = ofill[ifill_col] + nc*(slot + gnx*l + p);
965 		}
966 	      } else {
967 		if (dfill) {
968 		  for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++)
969 		    cols[cnt++]  = dfill[ifill_col] + nc*(slot + gnx*l + p);
970 		} else {
971 		  for (ifill_col=0; ifill_col<nc; ifill_col++)
972 		    cols[cnt++]  = ifill_col + nc*(slot + gnx*l + p);
973 		}
974 	      }
975 	    }
976 	  }
977 	  row  = k + nc*(slot);
978 	  ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
979 	}
980       }
981     }
982     ierr = PetscFree(values);CHKERRQ(ierr);
983     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
984     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
985   }
986   ierr = PetscFree(cols);CHKERRQ(ierr);
987   PetscFunctionReturn(0);
988 }
989 
990 /* ---------------------------------------------------------------------------------*/
991 
992 #undef __FUNCT__
993 #define __FUNCT__ "DMGetMatrix_DA_3d_MPIAIJ"
994 PetscErrorCode DMGetMatrix_DA_3d_MPIAIJ(DM da,Mat J)
995 {
996   PetscErrorCode         ierr;
997   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
998   PetscInt               m,n,dim,s,*cols = PETSC_NULL,k,nc,*rows = PETSC_NULL,col,cnt,l,p,*dnz = PETSC_NULL,*onz = PETSC_NULL;
999   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1000   MPI_Comm               comm;
1001   PetscScalar            *values;
1002   DMDAPeriodicType       wrap;
1003   ISLocalToGlobalMapping ltog,ltogb;
1004   DMDAStencilType        st;
1005   DM_DA                  *dd = (DM_DA*)da->data;
1006 
1007   PetscFunctionBegin;
1008   /*
1009          nc - number of components per grid point
1010          col - number of colors needed in one direction for single component problem
1011 
1012   */
1013   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&wrap,&st);CHKERRQ(ierr);
1014   col    = 2*s + 1;
1015 
1016   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1017   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1018   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1019 
1020   ierr = PetscMalloc2(nc,PetscInt,&rows,col*col*col*nc*nc,PetscInt,&cols);CHKERRQ(ierr);
1021   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1022   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1023 
1024   /* determine the matrix preallocation information */
1025   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1026   for (i=xs; i<xs+nx; i++) {
1027     istart = DMDAXPeriodic(wrap) ? -s : (PetscMax(-s,-i));
1028     iend   = DMDAXPeriodic(wrap) ?  s : (PetscMin(s,m-i-1));
1029     for (j=ys; j<ys+ny; j++) {
1030       jstart = DMDAYPeriodic(wrap) ? -s : (PetscMax(-s,-j));
1031       jend   = DMDAYPeriodic(wrap) ?  s : (PetscMin(s,n-j-1));
1032       for (k=zs; k<zs+nz; k++) {
1033 	kstart = DMDAZPeriodic(wrap) ? -s : (PetscMax(-s,-k));
1034 	kend   = DMDAZPeriodic(wrap) ?  s : (PetscMin(s,p-k-1));
1035 
1036 	slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1037 
1038 	cnt  = 0;
1039 	for (l=0; l<nc; l++) {
1040 	  for (ii=istart; ii<iend+1; ii++) {
1041 	    for (jj=jstart; jj<jend+1; jj++) {
1042 	      for (kk=kstart; kk<kend+1; kk++) {
1043 		if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1044 		  cols[cnt++]  = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1045 		}
1046 	      }
1047 	    }
1048 	  }
1049 	  rows[l] = l + nc*(slot);
1050 	}
1051 	ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1052       }
1053     }
1054   }
1055   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1056   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1057   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1058   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1059   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1060   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1061 
1062   /*
1063     For each node in the grid: we get the neighbors in the local (on processor ordering
1064     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1065     PETSc ordering.
1066   */
1067   if (!dd->prealloc_only) {
1068     ierr = PetscMalloc(col*col*col*nc*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1069     ierr = PetscMemzero(values,col*col*col*nc*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1070     for (i=xs; i<xs+nx; i++) {
1071       istart = DMDAXPeriodic(wrap) ? -s : (PetscMax(-s,-i));
1072       iend   = DMDAXPeriodic(wrap) ?  s : (PetscMin(s,m-i-1));
1073       for (j=ys; j<ys+ny; j++) {
1074 	jstart = DMDAYPeriodic(wrap) ? -s : (PetscMax(-s,-j));
1075 	jend   = DMDAYPeriodic(wrap) ?  s : (PetscMin(s,n-j-1));
1076 	for (k=zs; k<zs+nz; k++) {
1077 	  kstart = DMDAZPeriodic(wrap) ? -s : (PetscMax(-s,-k));
1078 	  kend   = DMDAZPeriodic(wrap) ?  s : (PetscMin(s,p-k-1));
1079 
1080 	  slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1081 
1082 	  cnt  = 0;
1083 	  for (l=0; l<nc; l++) {
1084 	    for (ii=istart; ii<iend+1; ii++) {
1085 	      for (jj=jstart; jj<jend+1; jj++) {
1086 		for (kk=kstart; kk<kend+1; kk++) {
1087 		  if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1088 		    cols[cnt++]  = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1089 		  }
1090 		}
1091 	      }
1092 	    }
1093 	    rows[l]      = l + nc*(slot);
1094 	  }
1095 	  ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1096 	}
1097       }
1098     }
1099     ierr = PetscFree(values);CHKERRQ(ierr);
1100     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1101     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1102   }
1103   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1104   PetscFunctionReturn(0);
1105 }
1106 
1107 /* ---------------------------------------------------------------------------------*/
1108 
1109 #undef __FUNCT__
1110 #define __FUNCT__ "DMGetMatrix_DA_1d_MPIAIJ"
1111 PetscErrorCode DMGetMatrix_DA_1d_MPIAIJ(DM da,Mat J)
1112 {
1113   PetscErrorCode         ierr;
1114   PetscInt               xs,nx,i,i1,slot,gxs,gnx;
1115   PetscInt               m,dim,s,*cols = PETSC_NULL,nc,*rows = PETSC_NULL,col,cnt,l;
1116   PetscInt               istart,iend;
1117   PetscScalar            *values;
1118   DMDAPeriodicType       wrap;
1119   ISLocalToGlobalMapping ltog,ltogb;
1120   DM_DA                  *dd = (DM_DA*)da->data;
1121 
1122   PetscFunctionBegin;
1123   /*
1124          nc - number of components per grid point
1125          col - number of colors needed in one direction for single component problem
1126 
1127   */
1128   ierr = DMDAGetInfo(da,&dim,&m,0,0,0,0,0,&nc,&s,&wrap,0);CHKERRQ(ierr);
1129   col    = 2*s + 1;
1130 
1131   ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
1132   ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
1133 
1134   ierr = MatSeqAIJSetPreallocation(J,col*nc,0);CHKERRQ(ierr);
1135   ierr = MatMPIAIJSetPreallocation(J,col*nc,0,col*nc,0);CHKERRQ(ierr);
1136   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1137   ierr = PetscMalloc2(nc,PetscInt,&rows,col*nc*nc,PetscInt,&cols);CHKERRQ(ierr);
1138 
1139   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1140   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1141   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1142   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1143 
1144   /*
1145     For each node in the grid: we get the neighbors in the local (on processor ordering
1146     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1147     PETSc ordering.
1148   */
1149   if (!dd->prealloc_only) {
1150     ierr = PetscMalloc(col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1151     ierr = PetscMemzero(values,col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1152     for (i=xs; i<xs+nx; i++) {
1153       istart = PetscMax(-s,gxs - i);
1154       iend   = PetscMin(s,gxs + gnx - i - 1);
1155       slot   = i - gxs;
1156 
1157       cnt  = 0;
1158       for (l=0; l<nc; l++) {
1159 	for (i1=istart; i1<iend+1; i1++) {
1160 	  cols[cnt++] = l + nc*(slot + i1);
1161 	}
1162 	rows[l]      = l + nc*(slot);
1163       }
1164       ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1165     }
1166     ierr = PetscFree(values);CHKERRQ(ierr);
1167     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1168     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1169   }
1170   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1171   PetscFunctionReturn(0);
1172 }
1173 
1174 #undef __FUNCT__
1175 #define __FUNCT__ "DMGetMatrix_DA_2d_MPIBAIJ"
1176 PetscErrorCode DMGetMatrix_DA_2d_MPIBAIJ(DM da,Mat J)
1177 {
1178   PetscErrorCode         ierr;
1179   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1180   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
1181   PetscInt               istart,iend,jstart,jend,ii,jj;
1182   MPI_Comm               comm;
1183   PetscScalar            *values;
1184   DMDAPeriodicType       wrap;
1185   DMDAStencilType        st;
1186   ISLocalToGlobalMapping ltog,ltogb;
1187   DM_DA                  *dd = (DM_DA*)da->data;
1188 
1189   PetscFunctionBegin;
1190   /*
1191      nc - number of components per grid point
1192      col - number of colors needed in one direction for single component problem
1193   */
1194   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&wrap,&st);CHKERRQ(ierr);
1195   col = 2*s + 1;
1196 
1197   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
1198   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
1199   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1200 
1201   ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1202 
1203   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1204   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1205 
1206   /* determine the matrix preallocation information */
1207   ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
1208   for (i=xs; i<xs+nx; i++) {
1209     istart = DMDAXPeriodic(wrap) ? -s : (PetscMax(-s,-i));
1210     iend   = DMDAXPeriodic(wrap) ?  s : (PetscMin(s,m-i-1));
1211     for (j=ys; j<ys+ny; j++) {
1212       jstart = DMDAYPeriodic(wrap) ? -s : (PetscMax(-s,-j));
1213       jend   = DMDAYPeriodic(wrap) ?  s : (PetscMin(s,n-j-1));
1214       slot = i - gxs + gnx*(j - gys);
1215 
1216       /* Find block columns in block row */
1217       cnt  = 0;
1218       for (ii=istart; ii<iend+1; ii++) {
1219         for (jj=jstart; jj<jend+1; jj++) {
1220           if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1221             cols[cnt++]  = slot + ii + gnx*jj;
1222           }
1223         }
1224       }
1225       ierr = MatPreallocateSetLocal(ltogb,1,&slot,ltogb,cnt,cols,dnz,onz);CHKERRQ(ierr);
1226     }
1227   }
1228   ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1229   ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1230   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1231 
1232   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1233   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1234 
1235   /*
1236     For each node in the grid: we get the neighbors in the local (on processor ordering
1237     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1238     PETSc ordering.
1239   */
1240   if (!dd->prealloc_only) {
1241     ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1242     ierr = PetscMemzero(values,col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1243     for (i=xs; i<xs+nx; i++) {
1244       istart = DMDAXPeriodic(wrap) ? -s : (PetscMax(-s,-i));
1245       iend   = DMDAXPeriodic(wrap) ?  s : (PetscMin(s,m-i-1));
1246       for (j=ys; j<ys+ny; j++) {
1247 	jstart = DMDAYPeriodic(wrap) ? -s : (PetscMax(-s,-j));
1248 	jend   = DMDAYPeriodic(wrap) ?  s : (PetscMin(s,n-j-1));
1249 	slot = i - gxs + gnx*(j - gys);
1250 	cnt  = 0;
1251         for (ii=istart; ii<iend+1; ii++) {
1252           for (jj=jstart; jj<jend+1; jj++) {
1253             if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1254               cols[cnt++]  = slot + ii + gnx*jj;
1255             }
1256           }
1257         }
1258 	ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1259       }
1260     }
1261     ierr = PetscFree(values);CHKERRQ(ierr);
1262     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1263     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1264   }
1265   ierr = PetscFree(cols);CHKERRQ(ierr);
1266   PetscFunctionReturn(0);
1267 }
1268 
1269 #undef __FUNCT__
1270 #define __FUNCT__ "DMGetMatrix_DA_3d_MPIBAIJ"
1271 PetscErrorCode DMGetMatrix_DA_3d_MPIBAIJ(DM da,Mat J)
1272 {
1273   PetscErrorCode         ierr;
1274   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1275   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
1276   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1277   MPI_Comm               comm;
1278   PetscScalar            *values;
1279   DMDAPeriodicType       wrap;
1280   DMDAStencilType        st;
1281   ISLocalToGlobalMapping ltog,ltogb;
1282   DM_DA                  *dd = (DM_DA*)da->data;
1283 
1284   PetscFunctionBegin;
1285   /*
1286          nc - number of components per grid point
1287          col - number of colors needed in one direction for single component problem
1288 
1289   */
1290   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&wrap,&st);CHKERRQ(ierr);
1291   col    = 2*s + 1;
1292 
1293   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1294   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1295   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1296 
1297   ierr  = PetscMalloc(col*col*col*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1298 
1299   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1300   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1301 
1302   /* determine the matrix preallocation information */
1303   ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1304   for (i=xs; i<xs+nx; i++) {
1305     istart = DMDAXPeriodic(wrap) ? -s : (PetscMax(-s,-i));
1306     iend   = DMDAXPeriodic(wrap) ?  s : (PetscMin(s,m-i-1));
1307     for (j=ys; j<ys+ny; j++) {
1308       jstart = DMDAYPeriodic(wrap) ? -s : (PetscMax(-s,-j));
1309       jend   = DMDAYPeriodic(wrap) ?  s : (PetscMin(s,n-j-1));
1310       for (k=zs; k<zs+nz; k++) {
1311 	kstart = DMDAZPeriodic(wrap) ? -s : (PetscMax(-s,-k));
1312 	kend   = DMDAZPeriodic(wrap) ?  s : (PetscMin(s,p-k-1));
1313 
1314 	slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1315 
1316 	/* Find block columns in block row */
1317 	cnt  = 0;
1318         for (ii=istart; ii<iend+1; ii++) {
1319           for (jj=jstart; jj<jend+1; jj++) {
1320             for (kk=kstart; kk<kend+1; kk++) {
1321               if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1322 		cols[cnt++]  = slot + ii + gnx*jj + gnx*gny*kk;
1323 	      }
1324 	    }
1325 	  }
1326 	}
1327 	ierr = MatPreallocateSetLocal(ltogb,1,&slot,ltogb,cnt,cols,dnz,onz);CHKERRQ(ierr);
1328       }
1329     }
1330   }
1331   ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1332   ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1333   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1334 
1335   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1336   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1337 
1338   /*
1339     For each node in the grid: we get the neighbors in the local (on processor ordering
1340     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1341     PETSc ordering.
1342   */
1343   if (!dd->prealloc_only) {
1344     ierr  = PetscMalloc(col*col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1345     ierr  = PetscMemzero(values,col*col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1346     for (i=xs; i<xs+nx; i++) {
1347       istart = DMDAXPeriodic(wrap) ? -s : (PetscMax(-s,-i));
1348       iend   = DMDAXPeriodic(wrap) ?  s : (PetscMin(s,m-i-1));
1349       for (j=ys; j<ys+ny; j++) {
1350 	jstart = DMDAYPeriodic(wrap) ? -s : (PetscMax(-s,-j));
1351 	jend   = DMDAYPeriodic(wrap) ?  s : (PetscMin(s,n-j-1));
1352 	for (k=zs; k<zs+nz; k++) {
1353 	  kstart = DMDAZPeriodic(wrap) ? -s : (PetscMax(-s,-k));
1354 	  kend   = DMDAZPeriodic(wrap) ?  s : (PetscMin(s,p-k-1));
1355 
1356 	  slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1357 
1358 	  cnt  = 0;
1359           for (ii=istart; ii<iend+1; ii++) {
1360             for (jj=jstart; jj<jend+1; jj++) {
1361               for (kk=kstart; kk<kend+1; kk++) {
1362                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1363                   cols[cnt++]  = slot + ii + gnx*jj + gnx*gny*kk;
1364                 }
1365               }
1366             }
1367           }
1368 	  ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1369 	}
1370       }
1371     }
1372     ierr = PetscFree(values);CHKERRQ(ierr);
1373     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1374     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1375   }
1376   ierr = PetscFree(cols);CHKERRQ(ierr);
1377   PetscFunctionReturn(0);
1378 }
1379 
1380 #undef __FUNCT__
1381 #define __FUNCT__ "L2GFilterUpperTriangular"
1382 /*
1383   This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to
1384   identify in the local ordering with periodic domain.
1385 */
1386 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog,PetscInt *row,PetscInt *cnt,PetscInt col[])
1387 {
1388   PetscErrorCode ierr;
1389   PetscInt       i,n;
1390 
1391   PetscFunctionBegin;
1392   ierr = ISLocalToGlobalMappingApply(ltog,1,row,row);CHKERRQ(ierr);
1393   ierr = ISLocalToGlobalMappingApply(ltog,*cnt,col,col);CHKERRQ(ierr);
1394   for (i=0,n=0; i<*cnt; i++) {
1395     if (col[i] >= *row) col[n++] = col[i];
1396   }
1397   *cnt = n;
1398   PetscFunctionReturn(0);
1399 }
1400 
1401 #undef __FUNCT__
1402 #define __FUNCT__ "DMGetMatrix_DA_2d_MPISBAIJ"
1403 PetscErrorCode DMGetMatrix_DA_2d_MPISBAIJ(DM da,Mat J)
1404 {
1405   PetscErrorCode         ierr;
1406   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1407   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
1408   PetscInt               istart,iend,jstart,jend,ii,jj;
1409   MPI_Comm               comm;
1410   PetscScalar            *values;
1411   DMDAPeriodicType       wrap;
1412   DMDAStencilType        st;
1413   ISLocalToGlobalMapping ltog,ltogb;
1414   DM_DA                  *dd = (DM_DA*)da->data;
1415 
1416   PetscFunctionBegin;
1417   /*
1418      nc - number of components per grid point
1419      col - number of colors needed in one direction for single component problem
1420   */
1421   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&wrap,&st);CHKERRQ(ierr);
1422   col = 2*s + 1;
1423 
1424   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
1425   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
1426   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1427 
1428   ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1429 
1430   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1431   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1432 
1433   /* determine the matrix preallocation information */
1434   ierr = MatPreallocateSymmetricInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
1435   for (i=xs; i<xs+nx; i++) {
1436     istart = DMDAXPeriodic(wrap) ? -s : (PetscMax(-s,-i));
1437     iend   = DMDAXPeriodic(wrap) ?  s : (PetscMin(s,m-i-1));
1438     for (j=ys; j<ys+ny; j++) {
1439       jstart = DMDAYPeriodic(wrap) ? -s : (PetscMax(-s,-j));
1440       jend   = DMDAYPeriodic(wrap) ?  s : (PetscMin(s,n-j-1));
1441       slot = i - gxs + gnx*(j - gys);
1442 
1443       /* Find block columns in block row */
1444       cnt  = 0;
1445       for (ii=istart; ii<iend+1; ii++) {
1446         for (jj=jstart; jj<jend+1; jj++) {
1447           if (st == DMDA_STENCIL_BOX || !ii || !jj) {
1448             cols[cnt++]  = slot + ii + gnx*jj;
1449           }
1450         }
1451       }
1452       ierr = L2GFilterUpperTriangular(ltogb,&slot,&cnt,cols);CHKERRQ(ierr);
1453       ierr = MatPreallocateSymmetricSet(slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
1454     }
1455   }
1456   ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1457   ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1458   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1459 
1460   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1461   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1462 
1463   /*
1464     For each node in the grid: we get the neighbors in the local (on processor ordering
1465     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1466     PETSc ordering.
1467   */
1468   if (!dd->prealloc_only) {
1469     ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1470     ierr = PetscMemzero(values,col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1471     for (i=xs; i<xs+nx; i++) {
1472       istart = DMDAXPeriodic(wrap) ? -s : (PetscMax(-s,-i));
1473       iend   = DMDAXPeriodic(wrap) ?  s : (PetscMin(s,m-i-1));
1474       for (j=ys; j<ys+ny; j++) {
1475         jstart = DMDAYPeriodic(wrap) ? -s : (PetscMax(-s,-j));
1476         jend   = DMDAYPeriodic(wrap) ?  s : (PetscMin(s,n-j-1));
1477         slot = i - gxs + gnx*(j - gys);
1478 
1479         /* Find block columns in block row */
1480         cnt  = 0;
1481         for (ii=istart; ii<iend+1; ii++) {
1482           for (jj=jstart; jj<jend+1; jj++) {
1483             if (st == DMDA_STENCIL_BOX || !ii || !jj) {
1484               cols[cnt++]  = slot + ii + gnx*jj;
1485             }
1486           }
1487         }
1488         ierr = L2GFilterUpperTriangular(ltogb,&slot,&cnt,cols);CHKERRQ(ierr);
1489 	ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1490       }
1491     }
1492     ierr = PetscFree(values);CHKERRQ(ierr);
1493     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1494     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1495   }
1496   ierr = PetscFree(cols);CHKERRQ(ierr);
1497   PetscFunctionReturn(0);
1498 }
1499 
1500 #undef __FUNCT__
1501 #define __FUNCT__ "DMGetMatrix_DA_3d_MPISBAIJ"
1502 PetscErrorCode DMGetMatrix_DA_3d_MPISBAIJ(DM da,Mat J)
1503 {
1504   PetscErrorCode         ierr;
1505   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1506   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
1507   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1508   MPI_Comm               comm;
1509   PetscScalar            *values;
1510   DMDAPeriodicType       wrap;
1511   DMDAStencilType        st;
1512   ISLocalToGlobalMapping ltog,ltogb;
1513   DM_DA                  *dd = (DM_DA*)da->data;
1514 
1515   PetscFunctionBegin;
1516   /*
1517      nc - number of components per grid point
1518      col - number of colors needed in one direction for single component problem
1519   */
1520   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&wrap,&st);CHKERRQ(ierr);
1521   col = 2*s + 1;
1522 
1523   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1524   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1525   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1526 
1527   /* create the matrix */
1528   ierr = PetscMalloc(col*col*col*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1529 
1530   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1531   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1532 
1533   /* determine the matrix preallocation information */
1534   ierr = MatPreallocateSymmetricInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1535   for (i=xs; i<xs+nx; i++) {
1536     istart = DMDAXPeriodic(wrap) ? -s : (PetscMax(-s,-i));
1537     iend   = DMDAXPeriodic(wrap) ?  s : (PetscMin(s,m-i-1));
1538     for (j=ys; j<ys+ny; j++) {
1539       jstart = DMDAYPeriodic(wrap) ? -s : (PetscMax(-s,-j));
1540       jend   = DMDAYPeriodic(wrap) ?  s : (PetscMin(s,n-j-1));
1541       for (k=zs; k<zs+nz; k++) {
1542         kstart = DMDAZPeriodic(wrap) ? -s : (PetscMax(-s,-k));
1543 	kend   = DMDAZPeriodic(wrap) ?  s : (PetscMin(s,p-k-1));
1544 
1545 	slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1546 
1547 	/* Find block columns in block row */
1548 	cnt  = 0;
1549         for (ii=istart; ii<iend+1; ii++) {
1550           for (jj=jstart; jj<jend+1; jj++) {
1551             for (kk=kstart; kk<kend+1; kk++) {
1552               if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
1553                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1554               }
1555             }
1556           }
1557         }
1558         ierr = L2GFilterUpperTriangular(ltogb,&slot,&cnt,cols);CHKERRQ(ierr);
1559         ierr = MatPreallocateSymmetricSet(slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
1560       }
1561     }
1562   }
1563   ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1564   ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1565   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1566 
1567   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1568   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1569 
1570   /*
1571     For each node in the grid: we get the neighbors in the local (on processor ordering
1572     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1573     PETSc ordering.
1574   */
1575   if (!dd->prealloc_only) {
1576     ierr = PetscMalloc(col*col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1577     ierr = PetscMemzero(values,col*col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1578     for (i=xs; i<xs+nx; i++) {
1579       istart = DMDAXPeriodic(wrap) ? -s : (PetscMax(-s,-i));
1580       iend   = DMDAXPeriodic(wrap) ?  s : (PetscMin(s,m-i-1));
1581       for (j=ys; j<ys+ny; j++) {
1582         jstart = DMDAYPeriodic(wrap) ? -s : (PetscMax(-s,-j));
1583 	jend   = DMDAYPeriodic(wrap) ?  s : (PetscMin(s,n-j-1));
1584 	for (k=zs; k<zs+nz; k++) {
1585           kstart = DMDAZPeriodic(wrap) ? -s : (PetscMax(-s,-k));
1586 	  kend   = DMDAZPeriodic(wrap) ?  s : (PetscMin(s,p-k-1));
1587 
1588 	  slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1589 
1590 	  cnt  = 0;
1591           for (ii=istart; ii<iend+1; ii++) {
1592             for (jj=jstart; jj<jend+1; jj++) {
1593               for (kk=kstart; kk<kend+1; kk++) {
1594                 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
1595 		  cols[cnt++]  = slot + ii + gnx*jj + gnx*gny*kk;
1596 		}
1597 	      }
1598 	    }
1599 	  }
1600           ierr = L2GFilterUpperTriangular(ltogb,&slot,&cnt,cols);CHKERRQ(ierr);
1601           ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1602 	}
1603       }
1604     }
1605     ierr = PetscFree(values);CHKERRQ(ierr);
1606     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1607     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1608   }
1609   ierr = PetscFree(cols);CHKERRQ(ierr);
1610   PetscFunctionReturn(0);
1611 }
1612 
1613 /* ---------------------------------------------------------------------------------*/
1614 
1615 #undef __FUNCT__
1616 #define __FUNCT__ "DMGetMatrix_DA_3d_MPIAIJ_Fill"
1617 PetscErrorCode DMGetMatrix_DA_3d_MPIAIJ_Fill(DM da,Mat J)
1618 {
1619   PetscErrorCode         ierr;
1620   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1621   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt,l,p,*dnz,*onz;
1622   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1623   DM_DA                  *dd = (DM_DA*)da->data;
1624   PetscInt               ifill_col,*dfill = dd->dfill,*ofill = dd->ofill;
1625   MPI_Comm               comm;
1626   PetscScalar            *values;
1627   DMDAPeriodicType       wrap;
1628   ISLocalToGlobalMapping ltog,ltogb;
1629   DMDAStencilType        st;
1630 
1631   PetscFunctionBegin;
1632   /*
1633          nc - number of components per grid point
1634          col - number of colors needed in one direction for single component problem
1635 
1636   */
1637   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&wrap,&st);CHKERRQ(ierr);
1638   col    = 2*s + 1;
1639   if (DMDAXPeriodic(wrap) && (m % col)){
1640     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
1641                  by 2*stencil_width + 1\n");
1642   }
1643   if (DMDAYPeriodic(wrap) && (n % col)){
1644     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
1645                  by 2*stencil_width + 1\n");
1646   }
1647   if (DMDAZPeriodic(wrap) && (p % col)){
1648     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
1649                  by 2*stencil_width + 1\n");
1650   }
1651 
1652   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1653   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1654   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1655 
1656   ierr = PetscMalloc(col*col*col*nc*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1657   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1658   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1659 
1660   /* determine the matrix preallocation information */
1661   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1662 
1663 
1664   for (i=xs; i<xs+nx; i++) {
1665     istart = DMDAXPeriodic(wrap) ? -s : (PetscMax(-s,-i));
1666     iend   = DMDAXPeriodic(wrap) ?  s : (PetscMin(s,m-i-1));
1667     for (j=ys; j<ys+ny; j++) {
1668       jstart = DMDAYPeriodic(wrap) ? -s : (PetscMax(-s,-j));
1669       jend   = DMDAYPeriodic(wrap) ?  s : (PetscMin(s,n-j-1));
1670       for (k=zs; k<zs+nz; k++) {
1671         kstart = DMDAZPeriodic(wrap) ? -s : (PetscMax(-s,-k));
1672         kend   = DMDAZPeriodic(wrap) ?  s : (PetscMin(s,p-k-1));
1673 
1674         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1675 
1676 	for (l=0; l<nc; l++) {
1677 	  cnt  = 0;
1678 	  for (ii=istart; ii<iend+1; ii++) {
1679 	    for (jj=jstart; jj<jend+1; jj++) {
1680 	      for (kk=kstart; kk<kend+1; kk++) {
1681 		if (ii || jj || kk) {
1682 		  if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1683 		    for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++)
1684 		      cols[cnt++]  = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1685 		  }
1686 		} else {
1687 		  if (dfill) {
1688 		    for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++)
1689 		      cols[cnt++]  = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1690 		  } else {
1691 		    for (ifill_col=0; ifill_col<nc; ifill_col++)
1692 		      cols[cnt++]  = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1693 		  }
1694 		}
1695 	      }
1696 	    }
1697 	  }
1698 	  row  = l + nc*(slot);
1699 	  ierr = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1700 	}
1701       }
1702     }
1703   }
1704   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1705   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1706   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1707   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1708   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1709 
1710   /*
1711     For each node in the grid: we get the neighbors in the local (on processor ordering
1712     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1713     PETSc ordering.
1714   */
1715   if (!dd->prealloc_only) {
1716     ierr = PetscMalloc(col*col*col*nc*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1717     ierr = PetscMemzero(values,col*col*col*nc*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1718     for (i=xs; i<xs+nx; i++) {
1719       istart = DMDAXPeriodic(wrap) ? -s : (PetscMax(-s,-i));
1720       iend   = DMDAXPeriodic(wrap) ?  s : (PetscMin(s,m-i-1));
1721       for (j=ys; j<ys+ny; j++) {
1722 	jstart = DMDAYPeriodic(wrap) ? -s : (PetscMax(-s,-j));
1723 	jend   = DMDAYPeriodic(wrap) ?  s : (PetscMin(s,n-j-1));
1724 	for (k=zs; k<zs+nz; k++) {
1725 	  kstart = DMDAZPeriodic(wrap) ? -s : (PetscMax(-s,-k));
1726 	  kend   = DMDAZPeriodic(wrap) ?  s : (PetscMin(s,p-k-1));
1727 
1728 	  slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1729 
1730 	  for (l=0; l<nc; l++) {
1731 	    cnt  = 0;
1732 	    for (ii=istart; ii<iend+1; ii++) {
1733 	      for (jj=jstart; jj<jend+1; jj++) {
1734 		for (kk=kstart; kk<kend+1; kk++) {
1735 		  if (ii || jj || kk) {
1736 		    if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1737 		      for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++)
1738 			cols[cnt++]  = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1739 		    }
1740 		  } else {
1741 		    if (dfill) {
1742 		      for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++)
1743 			cols[cnt++]  = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1744 		    } else {
1745 		      for (ifill_col=0; ifill_col<nc; ifill_col++)
1746 			cols[cnt++]  = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1747 		    }
1748 		  }
1749 		}
1750 	      }
1751 	    }
1752 	    row  = l + nc*(slot);
1753 	    ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1754 	  }
1755 	}
1756       }
1757     }
1758     ierr = PetscFree(values);CHKERRQ(ierr);
1759     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1760     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1761   }
1762   ierr = PetscFree(cols);CHKERRQ(ierr);
1763   PetscFunctionReturn(0);
1764 }
1765