xref: /petsc/src/dm/impls/da/fdda.c (revision 9a42bb27a39f0cdf3306a1e22d33cd9809484eaa)
1 #define PETSCDM_DLL
2 
3 #include "private/daimpl.h" /*I      "petscda.h"     I*/
4 #include "petscmat.h"         /*I      "petscmat.h"    I*/
5 #include "private/matimpl.h"
6 
7 EXTERN PetscErrorCode DAGetColoring1d_MPIAIJ(DM,ISColoringType,ISColoring *);
8 EXTERN PetscErrorCode DAGetColoring2d_MPIAIJ(DM,ISColoringType,ISColoring *);
9 EXTERN PetscErrorCode DAGetColoring2d_5pt_MPIAIJ(DM,ISColoringType,ISColoring *);
10 EXTERN PetscErrorCode DAGetColoring3d_MPIAIJ(DM,ISColoringType,ISColoring *);
11 
12 /*
13    For ghost i that may be negative or greater than the upper bound this
14   maps it into the 0:m-1 range using periodicity
15 */
16 #define SetInRange(i,m) ((i < 0) ? m+i:((i >= m) ? i-m:i))
17 
18 #undef __FUNCT__
19 #define __FUNCT__ "DASetBlockFills_Private"
20 static PetscErrorCode DASetBlockFills_Private(PetscInt *dfill,PetscInt w,PetscInt **rfill)
21 {
22   PetscErrorCode ierr;
23   PetscInt       i,j,nz,*fill;
24 
25   PetscFunctionBegin;
26   if (!dfill) PetscFunctionReturn(0);
27 
28   /* count number nonzeros */
29   nz = 0;
30   for (i=0; i<w; i++) {
31     for (j=0; j<w; j++) {
32       if (dfill[w*i+j]) nz++;
33     }
34   }
35   ierr = PetscMalloc((nz + w + 1)*sizeof(PetscInt),&fill);CHKERRQ(ierr);
36   /* construct modified CSR storage of nonzero structure */
37   nz = w + 1;
38   for (i=0; i<w; i++) {
39     fill[i] = nz;
40     for (j=0; j<w; j++) {
41       if (dfill[w*i+j]) {
42 	fill[nz] = j;
43 	nz++;
44       }
45     }
46   }
47   fill[w] = nz;
48 
49   *rfill = fill;
50   PetscFunctionReturn(0);
51 }
52 
53 #undef __FUNCT__
54 #define __FUNCT__ "DASetMatPreallocateOnly"
55 /*@
56     DASetMatPreallocateOnly - When DAGetMatrix() is called the matrix will be properly
57        preallocated but the nonzero structure and zero values will not be set.
58 
59     Logically Collective on DA
60 
61     Input Parameter:
62 +   da - the distributed array
63 -   only - PETSC_TRUE if only want preallocation
64 
65 
66     Level: developer
67 
68 .seealso DAGetMatrix(), DASetGetMatrix(), DASetBlockSize(), DASetBlockFills()
69 
70 @*/
71 PetscErrorCode PETSCDM_DLLEXPORT DASetMatPreallocateOnly(DM da,PetscBool  only)
72 {
73   DM_DA *dd = (DM_DA*)da->data;
74 
75   PetscFunctionBegin;
76   dd->prealloc_only = only;
77   PetscFunctionReturn(0);
78 }
79 
80 #undef __FUNCT__
81 #define __FUNCT__ "DASetBlockFills"
82 /*@
83     DASetBlockFills - Sets the fill pattern in each block for a multi-component problem
84     of the matrix returned by DAGetMatrix().
85 
86     Logically Collective on DA
87 
88     Input Parameter:
89 +   da - the distributed array
90 .   dfill - the fill pattern in the diagonal block (may be PETSC_NULL, means use dense block)
91 -   ofill - the fill pattern in the off-diagonal blocks
92 
93 
94     Level: developer
95 
96     Notes: This only makes sense when you are doing multicomponent problems but using the
97        MPIAIJ matrix format
98 
99            The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries
100        representing coupling and 0 entries for missing coupling. For example
101 $             dfill[9] = {1, 0, 0,
102 $                         1, 1, 0,
103 $                         0, 1, 1}
104        means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with
105        itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the
106        diagonal block).
107 
108      DASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
109      can be represented in the dfill, ofill format
110 
111    Contributed by Glenn Hammond
112 
113 .seealso DAGetMatrix(), DASetGetMatrix(), DASetMatPreallocateOnly()
114 
115 @*/
116 PetscErrorCode PETSCDM_DLLEXPORT DASetBlockFills(DM da,PetscInt *dfill,PetscInt *ofill)
117 {
118   DM_DA          *dd = (DM_DA*)da->data;
119   PetscErrorCode ierr;
120 
121   PetscFunctionBegin;
122   ierr = DASetBlockFills_Private(dfill,dd->w,&dd->dfill);CHKERRQ(ierr);
123   ierr = DASetBlockFills_Private(ofill,dd->w,&dd->ofill);CHKERRQ(ierr);
124   PetscFunctionReturn(0);
125 }
126 
127 
128 #undef __FUNCT__
129 #define __FUNCT__ "DAGetColoring"
130 /*@
131     DAGetColoring - Gets the coloring required for computing the Jacobian via
132     finite differences on a function defined using a stencil on the DA.
133 
134     Collective on DA
135 
136     Input Parameter:
137 +   da - the distributed array
138 .   ctype - IS_COLORING_GLOBAL or IS_COLORING_GHOSTED
139 -   mtype - either MATAIJ or MATBAIJ
140 
141     Output Parameters:
142 .   coloring - matrix coloring for use in computing Jacobians (or PETSC_NULL if not needed)
143 
144     Level: advanced
145 
146     Notes: These compute the graph coloring of the graph of A^{T}A. The coloring used
147    for efficient (parallel or thread based) triangular solves etc is NOT
148    available.
149 
150         For BAIJ matrices this colors the graph for the blocks, not for the individual matrix elements;
151     the same as MatGetColoring().
152 
153 .seealso ISColoringView(), ISColoringGetIS(), MatFDColoringCreate(), ISColoringType, ISColoring
154 
155 @*/
156 PetscErrorCode PETSCDM_DLLEXPORT DAGetColoring(DM da,ISColoringType ctype,const MatType mtype,ISColoring *coloring)
157 {
158   PetscErrorCode ierr;
159   PetscInt       dim,m,n,p,nc;
160   DAPeriodicType wrap;
161   MPI_Comm       comm;
162   PetscMPIInt    size;
163   PetscBool      isBAIJ;
164   DM_DA          *dd = (DM_DA*)da->data;
165 
166   PetscFunctionBegin;
167   /*
168                                   m
169           ------------------------------------------------------
170          |                                                     |
171          |                                                     |
172          |               ----------------------                |
173          |               |                    |                |
174       n  |           yn  |                    |                |
175          |               |                    |                |
176          |               .---------------------                |
177          |             (xs,ys)     xn                          |
178          |            .                                        |
179          |         (gxs,gys)                                   |
180          |                                                     |
181           -----------------------------------------------------
182   */
183 
184   /*
185          nc - number of components per grid point
186          col - number of colors needed in one direction for single component problem
187 
188   */
189   ierr = DAGetInfo(da,&dim,0,0,0,&m,&n,&p,&nc,0,&wrap,0);CHKERRQ(ierr);
190 
191   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
192   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
193   if (ctype == IS_COLORING_GHOSTED){
194     if (size == 1) {
195       ctype = IS_COLORING_GLOBAL;
196     } else if (dim > 1){
197       if ((m==1 && DAXPeriodic(wrap)) || (n==1 && DAYPeriodic(wrap)) || (p==1 && DAZPeriodic(wrap))){
198         SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"IS_COLORING_GHOSTED cannot be used for periodic boundary condition having both ends of the domain  on the same process");
199       }
200     }
201   }
202 
203   /* Tell the DA it has 1 degree of freedom per grid point so that the coloring for BAIJ
204      matrices is for the blocks, not the individual matrix elements  */
205   ierr = PetscStrcmp(mtype,MATBAIJ,&isBAIJ);CHKERRQ(ierr);
206   if (!isBAIJ) {ierr = PetscStrcmp(mtype,MATMPIBAIJ,&isBAIJ);CHKERRQ(ierr);}
207   if (!isBAIJ) {ierr = PetscStrcmp(mtype,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);}
208   if (isBAIJ) {
209     dd->w = 1;
210     dd->xs = dd->xs/nc;
211     dd->xe = dd->xe/nc;
212     dd->Xs = dd->Xs/nc;
213     dd->Xe = dd->Xe/nc;
214   }
215 
216   /*
217      We do not provide a getcoloring function in the DA operations because
218    the basic DA does not know about matrices. We think of DA as being more
219    more low-level then matrices.
220   */
221   if (dim == 1) {
222     ierr = DAGetColoring1d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
223   } else if (dim == 2) {
224     ierr =  DAGetColoring2d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
225   } else if (dim == 3) {
226     ierr =  DAGetColoring3d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
227   } else {
228     SETERRQ1(((PetscObject)da)->comm,PETSC_ERR_SUP,"Not done for %D dimension, send us mail petsc-maint@mcs.anl.gov for code",dim);
229   }
230   if (isBAIJ) {
231     dd->w = nc;
232     dd->xs = dd->xs*nc;
233     dd->xe = dd->xe*nc;
234     dd->Xs = dd->Xs*nc;
235     dd->Xe = dd->Xe*nc;
236   }
237   PetscFunctionReturn(0);
238 }
239 
240 /* ---------------------------------------------------------------------------------*/
241 
242 #undef __FUNCT__
243 #define __FUNCT__ "DAGetColoring2d_MPIAIJ"
244 PetscErrorCode DAGetColoring2d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
245 {
246   PetscErrorCode         ierr;
247   PetscInt               xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,M,N,dim,s,k,nc,col;
248   PetscInt               ncolors;
249   MPI_Comm               comm;
250   DAPeriodicType         wrap;
251   DAStencilType          st;
252   ISColoringValue        *colors;
253   DM_DA                  *dd = (DM_DA*)da->data;
254 
255   PetscFunctionBegin;
256   /*
257          nc - number of components per grid point
258          col - number of colors needed in one direction for single component problem
259 
260   */
261   ierr = DAGetInfo(da,&dim,&m,&n,0,&M,&N,0,&nc,&s,&wrap,&st);CHKERRQ(ierr);
262   col    = 2*s + 1;
263   ierr = DAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
264   ierr = DAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
265   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
266 
267   /* special case as taught to us by Paul Hovland */
268   if (st == DA_STENCIL_STAR && s == 1) {
269     ierr = DAGetColoring2d_5pt_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
270   } else {
271 
272     if (DAXPeriodic(wrap) && (m % col)){
273       SETERRQ2(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X (%d) is divisible\n\
274                  by 2*stencil_width + 1 (%d)\n", m, col);
275     }
276     if (DAYPeriodic(wrap) && (n % col)){
277       SETERRQ2(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y (%d) is divisible\n\
278                  by 2*stencil_width + 1 (%d)\n", n, col);
279     }
280     if (ctype == IS_COLORING_GLOBAL) {
281       if (!dd->localcoloring) {
282 	ierr = PetscMalloc(nc*nx*ny*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
283 	ii = 0;
284 	for (j=ys; j<ys+ny; j++) {
285 	  for (i=xs; i<xs+nx; i++) {
286 	    for (k=0; k<nc; k++) {
287 	      colors[ii++] = k + nc*((i % col) + col*(j % col));
288 	    }
289 	  }
290 	}
291         ncolors = nc + nc*(col-1 + col*(col-1));
292 	ierr = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,&dd->localcoloring);CHKERRQ(ierr);
293       }
294       *coloring = dd->localcoloring;
295     } else if (ctype == IS_COLORING_GHOSTED) {
296       if (!dd->ghostedcoloring) {
297 	ierr = PetscMalloc(nc*gnx*gny*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
298 	ii = 0;
299 	for (j=gys; j<gys+gny; j++) {
300 	  for (i=gxs; i<gxs+gnx; i++) {
301 	    for (k=0; k<nc; k++) {
302 	      /* the complicated stuff is to handle periodic boundaries */
303 	      colors[ii++] = k + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col));
304 	    }
305 	  }
306 	}
307         ncolors = nc + nc*(col - 1 + col*(col-1));
308 	ierr = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,&dd->ghostedcoloring);CHKERRQ(ierr);
309         /* PetscIntView(ncolors,(PetscInt *)colors,0); */
310 
311 	ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_GHOSTED);CHKERRQ(ierr);
312       }
313       *coloring = dd->ghostedcoloring;
314     } else SETERRQ1(((PetscObject)da)->comm,PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
315   }
316   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
317   PetscFunctionReturn(0);
318 }
319 
320 /* ---------------------------------------------------------------------------------*/
321 
322 #undef __FUNCT__
323 #define __FUNCT__ "DAGetColoring3d_MPIAIJ"
324 PetscErrorCode DAGetColoring3d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
325 {
326   PetscErrorCode  ierr;
327   PetscInt        xs,ys,nx,ny,i,j,gxs,gys,gnx,gny,m,n,p,dim,s,k,nc,col,zs,gzs,ii,l,nz,gnz,M,N,P;
328   PetscInt        ncolors;
329   MPI_Comm        comm;
330   DAPeriodicType  wrap;
331   DAStencilType   st;
332   ISColoringValue *colors;
333   DM_DA           *dd = (DM_DA*)da->data;
334 
335   PetscFunctionBegin;
336   /*
337          nc - number of components per grid point
338          col - number of colors needed in one direction for single component problem
339 
340   */
341   ierr = DAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&wrap,&st);CHKERRQ(ierr);
342   col    = 2*s + 1;
343   if (DAXPeriodic(wrap) && (m % col)){
344     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
345                  by 2*stencil_width + 1\n");
346   }
347   if (DAYPeriodic(wrap) && (n % col)){
348     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
349                  by 2*stencil_width + 1\n");
350   }
351   if (DAZPeriodic(wrap) && (p % col)){
352     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
353                  by 2*stencil_width + 1\n");
354   }
355 
356   ierr = DAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
357   ierr = DAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
358   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
359 
360   /* create the coloring */
361   if (ctype == IS_COLORING_GLOBAL) {
362     if (!dd->localcoloring) {
363       ierr = PetscMalloc(nc*nx*ny*nz*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
364       ii = 0;
365       for (k=zs; k<zs+nz; k++) {
366         for (j=ys; j<ys+ny; j++) {
367           for (i=xs; i<xs+nx; i++) {
368             for (l=0; l<nc; l++) {
369               colors[ii++] = l + nc*((i % col) + col*(j % col) + col*col*(k % col));
370             }
371           }
372         }
373       }
374       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
375       ierr = ISColoringCreate(comm,ncolors,nc*nx*ny*nz,colors,&dd->localcoloring);CHKERRQ(ierr);
376     }
377     *coloring = dd->localcoloring;
378   } else if (ctype == IS_COLORING_GHOSTED) {
379     if (!dd->ghostedcoloring) {
380       ierr = PetscMalloc(nc*gnx*gny*gnz*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
381       ii = 0;
382       for (k=gzs; k<gzs+gnz; k++) {
383         for (j=gys; j<gys+gny; j++) {
384           for (i=gxs; i<gxs+gnx; i++) {
385             for (l=0; l<nc; l++) {
386               /* the complicated stuff is to handle periodic boundaries */
387               colors[ii++] = l + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col) + col*col*(SetInRange(k,p) % col));
388             }
389           }
390         }
391       }
392       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
393       ierr = ISColoringCreate(comm,ncolors,nc*gnx*gny*gnz,colors,&dd->ghostedcoloring);CHKERRQ(ierr);
394       ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_GHOSTED);CHKERRQ(ierr);
395     }
396     *coloring = dd->ghostedcoloring;
397   } else SETERRQ1(((PetscObject)da)->comm,PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
398   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
399   PetscFunctionReturn(0);
400 }
401 
402 /* ---------------------------------------------------------------------------------*/
403 
404 #undef __FUNCT__
405 #define __FUNCT__ "DAGetColoring1d_MPIAIJ"
406 PetscErrorCode DAGetColoring1d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
407 {
408   PetscErrorCode  ierr;
409   PetscInt        xs,nx,i,i1,gxs,gnx,l,m,M,dim,s,nc,col;
410   PetscInt        ncolors;
411   MPI_Comm        comm;
412   DAPeriodicType  wrap;
413   ISColoringValue *colors;
414   DM_DA           *dd = (DM_DA*)da->data;
415 
416   PetscFunctionBegin;
417   /*
418          nc - number of components per grid point
419          col - number of colors needed in one direction for single component problem
420 
421   */
422   ierr = DAGetInfo(da,&dim,&m,0,0,&M,0,0,&nc,&s,&wrap,0);CHKERRQ(ierr);
423   col    = 2*s + 1;
424 
425   if (DAXPeriodic(wrap) && (m % col)) {
426     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points is divisible\n\
427                  by 2*stencil_width + 1\n");
428   }
429 
430   ierr = DAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
431   ierr = DAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
432   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
433 
434   /* create the coloring */
435   if (ctype == IS_COLORING_GLOBAL) {
436     if (!dd->localcoloring) {
437       ierr = PetscMalloc(nc*nx*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
438       i1 = 0;
439       for (i=xs; i<xs+nx; i++) {
440         for (l=0; l<nc; l++) {
441           colors[i1++] = l + nc*(i % col);
442         }
443       }
444       ncolors = nc + nc*(col-1);
445       ierr = ISColoringCreate(comm,ncolors,nc*nx,colors,&dd->localcoloring);CHKERRQ(ierr);
446     }
447     *coloring = dd->localcoloring;
448   } else if (ctype == IS_COLORING_GHOSTED) {
449     if (!dd->ghostedcoloring) {
450       ierr = PetscMalloc(nc*gnx*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
451       i1 = 0;
452       for (i=gxs; i<gxs+gnx; i++) {
453         for (l=0; l<nc; l++) {
454           /* the complicated stuff is to handle periodic boundaries */
455           colors[i1++] = l + nc*(SetInRange(i,m) % col);
456         }
457       }
458       ncolors = nc + nc*(col-1);
459       ierr = ISColoringCreate(comm,ncolors,nc*gnx,colors,&dd->ghostedcoloring);CHKERRQ(ierr);
460       ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_GHOSTED);CHKERRQ(ierr);
461     }
462     *coloring = dd->ghostedcoloring;
463   } else SETERRQ1(((PetscObject)da)->comm,PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
464   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
465   PetscFunctionReturn(0);
466 }
467 
468 #undef __FUNCT__
469 #define __FUNCT__ "DAGetColoring2d_5pt_MPIAIJ"
470 PetscErrorCode DAGetColoring2d_5pt_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
471 {
472   PetscErrorCode  ierr;
473   PetscInt        xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,dim,s,k,nc;
474   PetscInt        ncolors;
475   MPI_Comm        comm;
476   DAPeriodicType  wrap;
477   ISColoringValue *colors;
478   DM_DA           *dd = (DM_DA*)da->data;
479 
480   PetscFunctionBegin;
481   /*
482          nc - number of components per grid point
483          col - number of colors needed in one direction for single component problem
484 
485   */
486   ierr   = DAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&wrap,0);CHKERRQ(ierr);
487   ierr   = DAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
488   ierr   = DAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
489   ierr   = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
490 
491   if (DAXPeriodic(wrap) && (m % 5)){
492     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
493                  by 5\n");
494   }
495   if (DAYPeriodic(wrap) && (n % 5)){
496     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
497                  by 5\n");
498   }
499 
500   /* create the coloring */
501   if (ctype == IS_COLORING_GLOBAL) {
502     if (!dd->localcoloring) {
503       ierr = PetscMalloc(nc*nx*ny*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
504       ii = 0;
505       for (j=ys; j<ys+ny; j++) {
506 	for (i=xs; i<xs+nx; i++) {
507 	  for (k=0; k<nc; k++) {
508 	    colors[ii++] = k + nc*((3*j+i) % 5);
509 	  }
510 	}
511       }
512       ncolors = 5*nc;
513       ierr = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,&dd->localcoloring);CHKERRQ(ierr);
514     }
515     *coloring = dd->localcoloring;
516   } else if (ctype == IS_COLORING_GHOSTED) {
517     if (!dd->ghostedcoloring) {
518       ierr = PetscMalloc(nc*gnx*gny*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
519       ii = 0;
520       for (j=gys; j<gys+gny; j++) {
521 	for (i=gxs; i<gxs+gnx; i++) {
522 	  for (k=0; k<nc; k++) {
523 	    colors[ii++] = k + nc*((3*SetInRange(j,n) + SetInRange(i,m)) % 5);
524 	  }
525 	}
526       }
527       ncolors = 5*nc;
528       ierr = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,&dd->ghostedcoloring);CHKERRQ(ierr);
529       ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_GHOSTED);CHKERRQ(ierr);
530     }
531     *coloring = dd->ghostedcoloring;
532   } else SETERRQ1(((PetscObject)da)->comm,PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
533   PetscFunctionReturn(0);
534 }
535 
536 /* =========================================================================== */
537 EXTERN PetscErrorCode DAGetMatrix1d_MPIAIJ(DM,Mat);
538 EXTERN PetscErrorCode DAGetMatrix2d_MPIAIJ(DM,Mat);
539 EXTERN PetscErrorCode DAGetMatrix2d_MPIAIJ_Fill(DM,Mat);
540 EXTERN PetscErrorCode DAGetMatrix3d_MPIAIJ(DM,Mat);
541 EXTERN PetscErrorCode DAGetMatrix3d_MPIAIJ_Fill(DM,Mat);
542 EXTERN PetscErrorCode DAGetMatrix2d_MPIBAIJ(DM,Mat);
543 EXTERN PetscErrorCode DAGetMatrix3d_MPIBAIJ(DM,Mat);
544 EXTERN PetscErrorCode DAGetMatrix2d_MPISBAIJ(DM,Mat);
545 EXTERN PetscErrorCode DAGetMatrix3d_MPISBAIJ(DM,Mat);
546 
547 #undef __FUNCT__
548 #define __FUNCT__ "MatSetDA"
549 /*@
550    MatSetDA - Sets the DA that is to be used by the HYPRE_StructMatrix PETSc matrix
551 
552    Logically Collective on Mat
553 
554    Input Parameters:
555 +  mat - the matrix
556 -  da - the da
557 
558    Level: intermediate
559 
560 @*/
561 PetscErrorCode PETSCDM_DLLEXPORT MatSetDA(Mat mat,DM da)
562 {
563   PetscErrorCode ierr;
564 
565   PetscFunctionBegin;
566   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
567   PetscValidHeaderSpecific(da,DM_CLASSID,1);
568   ierr = PetscTryMethod(mat,"MatSetDA_C",(Mat,DM),(mat,da));CHKERRQ(ierr);
569   PetscFunctionReturn(0);
570 }
571 
572 EXTERN_C_BEGIN
573 #undef __FUNCT__
574 #define __FUNCT__ "MatView_MPI_DA"
575 PetscErrorCode PETSCDM_DLLEXPORT MatView_MPI_DA(Mat A,PetscViewer viewer)
576 {
577   DM             da;
578   PetscErrorCode ierr;
579   const char     *prefix;
580   Mat            Anatural;
581   AO             ao;
582   PetscInt       rstart,rend,*petsc,i;
583   IS             is;
584   MPI_Comm       comm;
585 
586   PetscFunctionBegin;
587   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
588   ierr = PetscObjectQuery((PetscObject)A,"DA",(PetscObject*)&da);CHKERRQ(ierr);
589   if (!da) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Matrix not generated from a DA");
590 
591   ierr = DAGetAO(da,&ao);CHKERRQ(ierr);
592   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
593   ierr = PetscMalloc((rend-rstart)*sizeof(PetscInt),&petsc);CHKERRQ(ierr);
594   for (i=rstart; i<rend; i++) petsc[i-rstart] = i;
595   ierr = AOApplicationToPetsc(ao,rend-rstart,petsc);CHKERRQ(ierr);
596   ierr = ISCreateGeneral(comm,rend-rstart,petsc,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
597 
598   /* call viewer on natural ordering */
599   ierr = MatGetSubMatrix(A,is,is,MAT_INITIAL_MATRIX,&Anatural);CHKERRQ(ierr);
600   ierr = ISDestroy(is);CHKERRQ(ierr);
601   ierr = PetscObjectGetOptionsPrefix((PetscObject)A,&prefix);CHKERRQ(ierr);
602   ierr = PetscObjectSetOptionsPrefix((PetscObject)Anatural,prefix);CHKERRQ(ierr);
603   ierr = PetscObjectSetName((PetscObject)Anatural,((PetscObject)A)->name);CHKERRQ(ierr);
604   ierr = MatView(Anatural,viewer);CHKERRQ(ierr);
605   ierr = MatDestroy(Anatural);CHKERRQ(ierr);
606   PetscFunctionReturn(0);
607 }
608 EXTERN_C_END
609 
610 EXTERN_C_BEGIN
611 #undef __FUNCT__
612 #define __FUNCT__ "MatLoad_MPI_DA"
613 PetscErrorCode PETSCDM_DLLEXPORT MatLoad_MPI_DA(Mat A,PetscViewer viewer)
614 {
615   DM             da;
616   PetscErrorCode ierr;
617   Mat            Anatural,Aapp;
618   AO             ao;
619   PetscInt       rstart,rend,*app,i;
620   IS             is;
621   MPI_Comm       comm;
622 
623   PetscFunctionBegin;
624   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
625   ierr = PetscObjectQuery((PetscObject)A,"DA",(PetscObject*)&da);CHKERRQ(ierr);
626   if (!da) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Matrix not generated from a DA");
627 
628   /* Load the matrix in natural ordering */
629   ierr = MatCreate(((PetscObject)A)->comm,&Anatural);CHKERRQ(ierr);
630   ierr = MatSetType(Anatural,((PetscObject)A)->type_name);CHKERRQ(ierr);
631   ierr = MatSetSizes(Anatural,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
632   ierr = MatLoad(Anatural,viewer);CHKERRQ(ierr);
633 
634   /* Map natural ordering to application ordering and create IS */
635   ierr = DAGetAO(da,&ao);CHKERRQ(ierr);
636   ierr = MatGetOwnershipRange(Anatural,&rstart,&rend);CHKERRQ(ierr);
637   ierr = PetscMalloc((rend-rstart)*sizeof(PetscInt),&app);CHKERRQ(ierr);
638   for (i=rstart; i<rend; i++) app[i-rstart] = i;
639   ierr = AOPetscToApplication(ao,rend-rstart,app);CHKERRQ(ierr);
640   ierr = ISCreateGeneral(comm,rend-rstart,app,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
641 
642   /* Do permutation and replace header */
643   ierr = MatGetSubMatrix(Anatural,is,is,MAT_INITIAL_MATRIX,&Aapp);CHKERRQ(ierr);
644   ierr = MatHeaderReplace(A,Aapp);CHKERRQ(ierr);
645   ierr = ISDestroy(is);CHKERRQ(ierr);
646   ierr = MatDestroy(Anatural);CHKERRQ(ierr);
647   PetscFunctionReturn(0);
648 }
649 EXTERN_C_END
650 
651 
652 #undef __FUNCT__
653 #define __FUNCT__ "DAGetMatrix"
654 /*@C
655     DAGetMatrix - Creates a matrix with the correct parallel layout and nonzero structure required for
656       computing the Jacobian on a function defined using the stencil set in the DA.
657 
658     Collective on DA
659 
660     Input Parameter:
661 +   da - the distributed array
662 -   mtype - Supported types are MATSEQAIJ, MATMPIAIJ, MATSEQBAIJ, MATMPIBAIJ, MATSEQSBAIJ, MATMPISBAIJ,
663             or any type which inherits from one of these (such as MATAIJ, MATBAIJ, MATSBAIJ)
664 
665     Output Parameters:
666 .   J  - matrix with the correct nonzero structure
667         (obviously without the correct Jacobian values)
668 
669     Level: advanced
670 
671     Notes: This properly preallocates the number of nonzeros in the sparse matrix so you
672        do not need to do it yourself.
673 
674        By default it also sets the nonzero structure and puts in the zero entries. To prevent setting
675        the nonzero pattern call DASetMatPreallocateOnly()
676 
677        When you call MatView() on this matrix it is displayed using the global natural ordering, NOT in the ordering used
678        internally by PETSc.
679 
680        In general it is easiest to use MatSetValuesStencil() or MatSetValuesLocal() to put values into the matrix because MatSetValues() requires
681        the indices for the global numbering for DAs which is complicated.
682 
683 .seealso ISColoringView(), ISColoringGetIS(), MatFDColoringCreate(), DASetBlockFills(), DASetMatPreallocateOnly()
684 
685 @*/
686 PetscErrorCode PETSCDM_DLLEXPORT DAGetMatrix(DM da, const MatType mtype,Mat *J)
687 {
688   PetscErrorCode ierr;
689   PetscInt       dim,dof,nx,ny,nz,dims[3],starts[3],M,N,P;
690   Mat            A;
691   MPI_Comm       comm;
692   const MatType  Atype;
693   void           (*aij)(void)=PETSC_NULL,(*baij)(void)=PETSC_NULL,(*sbaij)(void)=PETSC_NULL;
694   MatType        ttype[256];
695   PetscBool      flg;
696   PetscMPIInt    size;
697   DM_DA          *dd = (DM_DA*)da->data;
698 
699   PetscFunctionBegin;
700 #ifndef PETSC_USE_DYNAMIC_LIBRARIES
701   ierr = MatInitializePackage(PETSC_NULL);CHKERRQ(ierr);
702 #endif
703   ierr = PetscStrcpy((char*)ttype,mtype);CHKERRQ(ierr);
704   ierr = PetscOptionsBegin(((PetscObject)da)->comm,((PetscObject)da)->prefix,"DA options","Mat");CHKERRQ(ierr);
705   ierr = PetscOptionsList("-da_mat_type","Matrix type","MatSetType",MatList,mtype,(char*)ttype,256,&flg);CHKERRQ(ierr);
706   ierr = PetscOptionsEnd();
707 
708   /*
709                                   m
710           ------------------------------------------------------
711          |                                                     |
712          |                                                     |
713          |               ----------------------                |
714          |               |                    |                |
715       n  |           ny  |                    |                |
716          |               |                    |                |
717          |               .---------------------                |
718          |             (xs,ys)     nx                          |
719          |            .                                        |
720          |         (gxs,gys)                                   |
721          |                                                     |
722           -----------------------------------------------------
723   */
724 
725   /*
726          nc - number of components per grid point
727          col - number of colors needed in one direction for single component problem
728 
729   */
730   ierr = DAGetInfo(da,&dim,&M,&N,&P,0,0,0,&dof,0,0,0);CHKERRQ(ierr);
731   ierr = DAGetCorners(da,0,0,0,&nx,&ny,&nz);CHKERRQ(ierr);
732   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
733   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
734   ierr = MatSetSizes(A,dof*nx*ny*nz,dof*nx*ny*nz,dof*M*N*P,dof*M*N*P);CHKERRQ(ierr);
735   ierr = MatSetType(A,(const MatType)ttype);CHKERRQ(ierr);
736   ierr = MatSetDA(A,da);CHKERRQ(ierr);
737   ierr = MatSetFromOptions(A);CHKERRQ(ierr);
738   ierr = MatGetType(A,&Atype);CHKERRQ(ierr);
739   /*
740      We do not provide a getmatrix function in the DA operations because
741    the basic DA does not know about matrices. We think of DA as being more
742    more low-level than matrices. This is kind of cheating but, cause sometimes
743    we think of DA has higher level than matrices.
744 
745      We could switch based on Atype (or mtype), but we do not since the
746    specialized setting routines depend only the particular preallocation
747    details of the matrix, not the type itself.
748   */
749   ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJSetPreallocation_C",&aij);CHKERRQ(ierr);
750   if (!aij) {
751     ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJSetPreallocation_C",&aij);CHKERRQ(ierr);
752   }
753   if (!aij) {
754     ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIBAIJSetPreallocation_C",&baij);CHKERRQ(ierr);
755     if (!baij) {
756       ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",&baij);CHKERRQ(ierr);
757     }
758     if (!baij){
759       ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPISBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr);
760       if (!sbaij) {
761         ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr);
762       }
763     }
764   }
765   if (aij) {
766     if (dim == 1) {
767       ierr = DAGetMatrix1d_MPIAIJ(da,A);CHKERRQ(ierr);
768     } else if (dim == 2) {
769       if (dd->ofill) {
770         ierr = DAGetMatrix2d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
771       } else {
772         ierr = DAGetMatrix2d_MPIAIJ(da,A);CHKERRQ(ierr);
773       }
774     } else if (dim == 3) {
775       if (dd->ofill) {
776         ierr = DAGetMatrix3d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
777       } else {
778         ierr = DAGetMatrix3d_MPIAIJ(da,A);CHKERRQ(ierr);
779       }
780     }
781   } else if (baij) {
782     if (dim == 2) {
783       ierr = DAGetMatrix2d_MPIBAIJ(da,A);CHKERRQ(ierr);
784     } else if (dim == 3) {
785       ierr = DAGetMatrix3d_MPIBAIJ(da,A);CHKERRQ(ierr);
786     } else {
787       SETERRQ2(((PetscObject)da)->comm,PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension!\n" \
788 	       "Send mail to petsc-maint@mcs.anl.gov for code",Atype,dim);
789     }
790   } else if (sbaij) {
791     if (dim == 2) {
792       ierr = DAGetMatrix2d_MPISBAIJ(da,A);CHKERRQ(ierr);
793     } else if (dim == 3) {
794       ierr = DAGetMatrix3d_MPISBAIJ(da,A);CHKERRQ(ierr);
795     } else {
796       SETERRQ2(((PetscObject)da)->comm,PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension!\n" \
797 	       "Send mail to petsc-maint@mcs.anl.gov for code",Atype,dim);
798     }
799   }
800   ierr = DAGetGhostCorners(da,&starts[0],&starts[1],&starts[2],&dims[0],&dims[1],&dims[2]);CHKERRQ(ierr);
801   ierr = MatSetStencil(A,dim,dims,starts,dof);CHKERRQ(ierr);
802   ierr = PetscObjectCompose((PetscObject)A,"DA",(PetscObject)da);CHKERRQ(ierr);
803   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
804   if (size > 1) {
805     /* change viewer to display matrix in natural ordering */
806     ierr = MatShellSetOperation(A, MATOP_VIEW, (void (*)(void)) MatView_MPI_DA);CHKERRQ(ierr);
807     ierr = MatShellSetOperation(A, MATOP_LOAD, (void (*)(void)) MatLoad_MPI_DA);CHKERRQ(ierr);
808   }
809   *J = A;
810   PetscFunctionReturn(0);
811 }
812 
813 /* ---------------------------------------------------------------------------------*/
814 #undef __FUNCT__
815 #define __FUNCT__ "DAGetMatrix2d_MPIAIJ"
816 PetscErrorCode DAGetMatrix2d_MPIAIJ(DM da,Mat J)
817 {
818   PetscErrorCode         ierr;
819   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = PETSC_NULL,k,nc,*rows = PETSC_NULL,col,cnt,l,p;
820   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
821   MPI_Comm               comm;
822   PetscScalar            *values;
823   DAPeriodicType         wrap;
824   ISLocalToGlobalMapping ltog,ltogb;
825   DAStencilType          st;
826   DM_DA                  *dd = (DM_DA*)da->data;
827 
828   PetscFunctionBegin;
829   /*
830          nc - number of components per grid point
831          col - number of colors needed in one direction for single component problem
832 
833   */
834   ierr = DAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&wrap,&st);CHKERRQ(ierr);
835   col = 2*s + 1;
836   ierr = DAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
837   ierr = DAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
838   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
839 
840   ierr = PetscMalloc2(nc,PetscInt,&rows,col*col*nc*nc,PetscInt,&cols);CHKERRQ(ierr);
841   ierr = DAGetISLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
842   ierr = DAGetISLocalToGlobalMappingBlck(da,&ltogb);CHKERRQ(ierr);
843 
844   /* determine the matrix preallocation information */
845   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
846   for (i=xs; i<xs+nx; i++) {
847 
848     pstart = DAXPeriodic(wrap) ? -s : (PetscMax(-s,-i));
849     pend   = DAXPeriodic(wrap) ?  s : (PetscMin(s,m-i-1));
850 
851     for (j=ys; j<ys+ny; j++) {
852       slot = i - gxs + gnx*(j - gys);
853 
854       lstart = DAYPeriodic(wrap) ? -s : (PetscMax(-s,-j));
855       lend   = DAYPeriodic(wrap) ?  s : (PetscMin(s,n-j-1));
856 
857       cnt  = 0;
858       for (k=0; k<nc; k++) {
859 	for (l=lstart; l<lend+1; l++) {
860 	  for (p=pstart; p<pend+1; p++) {
861 	    if ((st == DA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
862 	      cols[cnt++]  = k + nc*(slot + gnx*l + p);
863 	    }
864 	  }
865 	}
866 	rows[k] = k + nc*(slot);
867       }
868       ierr = MatPreallocateSetLocal(ltog,nc,rows,cnt,cols,dnz,onz);CHKERRQ(ierr);
869     }
870   }
871   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
872   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
873   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
874   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
875 
876   ierr = MatSetLocalToGlobalMapping(J,ltog);CHKERRQ(ierr);
877   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb);CHKERRQ(ierr);
878 
879   /*
880     For each node in the grid: we get the neighbors in the local (on processor ordering
881     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
882     PETSc ordering.
883   */
884   if (!dd->prealloc_only) {
885     ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
886     ierr = PetscMemzero(values,col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
887     for (i=xs; i<xs+nx; i++) {
888 
889       pstart = DAXPeriodic(wrap) ? -s : (PetscMax(-s,-i));
890       pend   = DAXPeriodic(wrap) ?  s : (PetscMin(s,m-i-1));
891 
892       for (j=ys; j<ys+ny; j++) {
893 	slot = i - gxs + gnx*(j - gys);
894 
895 	lstart = DAYPeriodic(wrap) ? -s : (PetscMax(-s,-j));
896 	lend   = DAYPeriodic(wrap) ?  s : (PetscMin(s,n-j-1));
897 
898 	cnt  = 0;
899 	for (k=0; k<nc; k++) {
900 	  for (l=lstart; l<lend+1; l++) {
901 	    for (p=pstart; p<pend+1; p++) {
902 	      if ((st == DA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
903 		cols[cnt++]  = k + nc*(slot + gnx*l + p);
904 	      }
905 	    }
906 	  }
907 	  rows[k]      = k + nc*(slot);
908 	}
909 	ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
910       }
911     }
912     ierr = PetscFree(values);CHKERRQ(ierr);
913     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
914     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
915   }
916   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
917   PetscFunctionReturn(0);
918 }
919 
920 #undef __FUNCT__
921 #define __FUNCT__ "DAGetMatrix2d_MPIAIJ_Fill"
922 PetscErrorCode DAGetMatrix2d_MPIAIJ_Fill(DM da,Mat J)
923 {
924   PetscErrorCode         ierr;
925   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
926   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt,l,p;
927   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
928   DM_DA                  *dd = (DM_DA*)da->data;
929   PetscInt               ifill_col,*ofill = dd->ofill, *dfill = dd->dfill;
930   MPI_Comm               comm;
931   PetscScalar            *values;
932   DAPeriodicType         wrap;
933   ISLocalToGlobalMapping ltog,ltogb;
934   DAStencilType          st;
935 
936   PetscFunctionBegin;
937   /*
938          nc - number of components per grid point
939          col - number of colors needed in one direction for single component problem
940 
941   */
942   ierr = DAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&wrap,&st);CHKERRQ(ierr);
943   col = 2*s + 1;
944   ierr = DAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
945   ierr = DAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
946   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
947 
948   ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscInt),&cols);CHKERRQ(ierr);
949   ierr = DAGetISLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
950   ierr = DAGetISLocalToGlobalMappingBlck(da,&ltogb);CHKERRQ(ierr);
951 
952   /* determine the matrix preallocation information */
953   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
954   for (i=xs; i<xs+nx; i++) {
955 
956     pstart = DAXPeriodic(wrap) ? -s : (PetscMax(-s,-i));
957     pend   = DAXPeriodic(wrap) ?  s : (PetscMin(s,m-i-1));
958 
959     for (j=ys; j<ys+ny; j++) {
960       slot = i - gxs + gnx*(j - gys);
961 
962       lstart = DAYPeriodic(wrap) ? -s : (PetscMax(-s,-j));
963       lend   = DAYPeriodic(wrap) ?  s : (PetscMin(s,n-j-1));
964 
965       for (k=0; k<nc; k++) {
966         cnt  = 0;
967 	for (l=lstart; l<lend+1; l++) {
968 	  for (p=pstart; p<pend+1; p++) {
969             if (l || p) {
970 	      if ((st == DA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
971                 for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++)
972 		  cols[cnt++]  = ofill[ifill_col] + nc*(slot + gnx*l + p);
973 	      }
974             } else {
975 	      if (dfill) {
976 		for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++)
977 		  cols[cnt++]  = dfill[ifill_col] + nc*(slot + gnx*l + p);
978 	      } else {
979 		for (ifill_col=0; ifill_col<nc; ifill_col++)
980 		  cols[cnt++]  = ifill_col + nc*(slot + gnx*l + p);
981 	      }
982             }
983 	  }
984 	}
985 	row = k + nc*(slot);
986         ierr = MatPreallocateSetLocal(ltog,1,&row,cnt,cols,dnz,onz);CHKERRQ(ierr);
987       }
988     }
989   }
990   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
991   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
992   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
993   ierr = MatSetLocalToGlobalMapping(J,ltog);CHKERRQ(ierr);
994   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb);CHKERRQ(ierr);
995 
996   /*
997     For each node in the grid: we get the neighbors in the local (on processor ordering
998     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
999     PETSc ordering.
1000   */
1001   if (!dd->prealloc_only) {
1002     ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1003     ierr = PetscMemzero(values,col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1004     for (i=xs; i<xs+nx; i++) {
1005 
1006       pstart = DAXPeriodic(wrap) ? -s : (PetscMax(-s,-i));
1007       pend   = DAXPeriodic(wrap) ?  s : (PetscMin(s,m-i-1));
1008 
1009       for (j=ys; j<ys+ny; j++) {
1010 	slot = i - gxs + gnx*(j - gys);
1011 
1012 	lstart = DAYPeriodic(wrap) ? -s : (PetscMax(-s,-j));
1013 	lend   = DAYPeriodic(wrap) ?  s : (PetscMin(s,n-j-1));
1014 
1015 	for (k=0; k<nc; k++) {
1016 	  cnt  = 0;
1017 	  for (l=lstart; l<lend+1; l++) {
1018 	    for (p=pstart; p<pend+1; p++) {
1019 	      if (l || p) {
1020 		if ((st == DA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
1021 		  for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++)
1022 		    cols[cnt++]  = ofill[ifill_col] + nc*(slot + gnx*l + p);
1023 		}
1024 	      } else {
1025 		if (dfill) {
1026 		  for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++)
1027 		    cols[cnt++]  = dfill[ifill_col] + nc*(slot + gnx*l + p);
1028 		} else {
1029 		  for (ifill_col=0; ifill_col<nc; ifill_col++)
1030 		    cols[cnt++]  = ifill_col + nc*(slot + gnx*l + p);
1031 		}
1032 	      }
1033 	    }
1034 	  }
1035 	  row  = k + nc*(slot);
1036 	  ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1037 	}
1038       }
1039     }
1040     ierr = PetscFree(values);CHKERRQ(ierr);
1041     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1042     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1043   }
1044   ierr = PetscFree(cols);CHKERRQ(ierr);
1045   PetscFunctionReturn(0);
1046 }
1047 
1048 /* ---------------------------------------------------------------------------------*/
1049 
1050 #undef __FUNCT__
1051 #define __FUNCT__ "DAGetMatrix3d_MPIAIJ"
1052 PetscErrorCode DAGetMatrix3d_MPIAIJ(DM da,Mat J)
1053 {
1054   PetscErrorCode         ierr;
1055   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1056   PetscInt               m,n,dim,s,*cols = PETSC_NULL,k,nc,*rows = PETSC_NULL,col,cnt,l,p,*dnz = PETSC_NULL,*onz = PETSC_NULL;
1057   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1058   MPI_Comm               comm;
1059   PetscScalar            *values;
1060   DAPeriodicType         wrap;
1061   ISLocalToGlobalMapping ltog,ltogb;
1062   DAStencilType          st;
1063   DM_DA                  *dd = (DM_DA*)da->data;
1064 
1065   PetscFunctionBegin;
1066   /*
1067          nc - number of components per grid point
1068          col - number of colors needed in one direction for single component problem
1069 
1070   */
1071   ierr = DAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&wrap,&st);CHKERRQ(ierr);
1072   col    = 2*s + 1;
1073 
1074   ierr = DAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1075   ierr = DAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1076   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1077 
1078   ierr = PetscMalloc2(nc,PetscInt,&rows,col*col*col*nc*nc,PetscInt,&cols);CHKERRQ(ierr);
1079   ierr = DAGetISLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1080   ierr = DAGetISLocalToGlobalMappingBlck(da,&ltogb);CHKERRQ(ierr);
1081 
1082   /* determine the matrix preallocation information */
1083   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1084   for (i=xs; i<xs+nx; i++) {
1085     istart = DAXPeriodic(wrap) ? -s : (PetscMax(-s,-i));
1086     iend   = DAXPeriodic(wrap) ?  s : (PetscMin(s,m-i-1));
1087     for (j=ys; j<ys+ny; j++) {
1088       jstart = DAYPeriodic(wrap) ? -s : (PetscMax(-s,-j));
1089       jend   = DAYPeriodic(wrap) ?  s : (PetscMin(s,n-j-1));
1090       for (k=zs; k<zs+nz; k++) {
1091 	kstart = DAZPeriodic(wrap) ? -s : (PetscMax(-s,-k));
1092 	kend   = DAZPeriodic(wrap) ?  s : (PetscMin(s,p-k-1));
1093 
1094 	slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1095 
1096 	cnt  = 0;
1097 	for (l=0; l<nc; l++) {
1098 	  for (ii=istart; ii<iend+1; ii++) {
1099 	    for (jj=jstart; jj<jend+1; jj++) {
1100 	      for (kk=kstart; kk<kend+1; kk++) {
1101 		if ((st == DA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1102 		  cols[cnt++]  = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1103 		}
1104 	      }
1105 	    }
1106 	  }
1107 	  rows[l] = l + nc*(slot);
1108 	}
1109 	ierr = MatPreallocateSetLocal(ltog,nc,rows,cnt,cols,dnz,onz);CHKERRQ(ierr);
1110       }
1111     }
1112   }
1113   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1114   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1115   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1116   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1117   ierr = MatSetLocalToGlobalMapping(J,ltog);CHKERRQ(ierr);
1118   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb);CHKERRQ(ierr);
1119 
1120   /*
1121     For each node in the grid: we get the neighbors in the local (on processor ordering
1122     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1123     PETSc ordering.
1124   */
1125   if (!dd->prealloc_only) {
1126     ierr = PetscMalloc(col*col*col*nc*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1127     ierr = PetscMemzero(values,col*col*col*nc*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1128     for (i=xs; i<xs+nx; i++) {
1129       istart = DAXPeriodic(wrap) ? -s : (PetscMax(-s,-i));
1130       iend   = DAXPeriodic(wrap) ?  s : (PetscMin(s,m-i-1));
1131       for (j=ys; j<ys+ny; j++) {
1132 	jstart = DAYPeriodic(wrap) ? -s : (PetscMax(-s,-j));
1133 	jend   = DAYPeriodic(wrap) ?  s : (PetscMin(s,n-j-1));
1134 	for (k=zs; k<zs+nz; k++) {
1135 	  kstart = DAZPeriodic(wrap) ? -s : (PetscMax(-s,-k));
1136 	  kend   = DAZPeriodic(wrap) ?  s : (PetscMin(s,p-k-1));
1137 
1138 	  slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1139 
1140 	  cnt  = 0;
1141 	  for (l=0; l<nc; l++) {
1142 	    for (ii=istart; ii<iend+1; ii++) {
1143 	      for (jj=jstart; jj<jend+1; jj++) {
1144 		for (kk=kstart; kk<kend+1; kk++) {
1145 		  if ((st == DA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1146 		    cols[cnt++]  = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1147 		  }
1148 		}
1149 	      }
1150 	    }
1151 	    rows[l]      = l + nc*(slot);
1152 	  }
1153 	  ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1154 	}
1155       }
1156     }
1157     ierr = PetscFree(values);CHKERRQ(ierr);
1158     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1159     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1160   }
1161   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1162   PetscFunctionReturn(0);
1163 }
1164 
1165 /* ---------------------------------------------------------------------------------*/
1166 
1167 #undef __FUNCT__
1168 #define __FUNCT__ "DAGetMatrix1d_MPIAIJ"
1169 PetscErrorCode DAGetMatrix1d_MPIAIJ(DM da,Mat J)
1170 {
1171   PetscErrorCode         ierr;
1172   PetscInt               xs,nx,i,i1,slot,gxs,gnx;
1173   PetscInt               m,dim,s,*cols = PETSC_NULL,nc,*rows = PETSC_NULL,col,cnt,l;
1174   PetscInt               istart,iend;
1175   PetscScalar            *values;
1176   DAPeriodicType         wrap;
1177   ISLocalToGlobalMapping ltog,ltogb;
1178   DM_DA                  *dd = (DM_DA*)da->data;
1179 
1180   PetscFunctionBegin;
1181   /*
1182          nc - number of components per grid point
1183          col - number of colors needed in one direction for single component problem
1184 
1185   */
1186   ierr = DAGetInfo(da,&dim,&m,0,0,0,0,0,&nc,&s,&wrap,0);CHKERRQ(ierr);
1187   col    = 2*s + 1;
1188 
1189   ierr = DAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
1190   ierr = DAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
1191 
1192   ierr = MatSeqAIJSetPreallocation(J,col*nc,0);CHKERRQ(ierr);
1193   ierr = MatMPIAIJSetPreallocation(J,col*nc,0,col*nc,0);CHKERRQ(ierr);
1194   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1195   ierr = PetscMalloc2(nc,PetscInt,&rows,col*nc*nc,PetscInt,&cols);CHKERRQ(ierr);
1196 
1197   ierr = DAGetISLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1198   ierr = DAGetISLocalToGlobalMappingBlck(da,&ltogb);CHKERRQ(ierr);
1199   ierr = MatSetLocalToGlobalMapping(J,ltog);CHKERRQ(ierr);
1200   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb);CHKERRQ(ierr);
1201 
1202   /*
1203     For each node in the grid: we get the neighbors in the local (on processor ordering
1204     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1205     PETSc ordering.
1206   */
1207   if (!dd->prealloc_only) {
1208     ierr = PetscMalloc(col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1209     ierr = PetscMemzero(values,col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1210     for (i=xs; i<xs+nx; i++) {
1211       istart = PetscMax(-s,gxs - i);
1212       iend   = PetscMin(s,gxs + gnx - i - 1);
1213       slot   = i - gxs;
1214 
1215       cnt  = 0;
1216       for (l=0; l<nc; l++) {
1217 	for (i1=istart; i1<iend+1; i1++) {
1218 	  cols[cnt++] = l + nc*(slot + i1);
1219 	}
1220 	rows[l]      = l + nc*(slot);
1221       }
1222       ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1223     }
1224     ierr = PetscFree(values);CHKERRQ(ierr);
1225     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1226     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1227   }
1228   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1229   PetscFunctionReturn(0);
1230 }
1231 
1232 #undef __FUNCT__
1233 #define __FUNCT__ "DAGetMatrix2d_MPIBAIJ"
1234 PetscErrorCode DAGetMatrix2d_MPIBAIJ(DM da,Mat J)
1235 {
1236   PetscErrorCode         ierr;
1237   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1238   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
1239   PetscInt               istart,iend,jstart,jend,ii,jj;
1240   MPI_Comm               comm;
1241   PetscScalar            *values;
1242   DAPeriodicType         wrap;
1243   DAStencilType          st;
1244   ISLocalToGlobalMapping ltog,ltogb;
1245   DM_DA                  *dd = (DM_DA*)da->data;
1246 
1247   PetscFunctionBegin;
1248   /*
1249      nc - number of components per grid point
1250      col - number of colors needed in one direction for single component problem
1251   */
1252   ierr = DAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&wrap,&st);CHKERRQ(ierr);
1253   col = 2*s + 1;
1254 
1255   ierr = DAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
1256   ierr = DAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
1257   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1258 
1259   ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1260 
1261   ierr = DAGetISLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1262   ierr = DAGetISLocalToGlobalMappingBlck(da,&ltogb);CHKERRQ(ierr);
1263 
1264   /* determine the matrix preallocation information */
1265   ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
1266   for (i=xs; i<xs+nx; i++) {
1267     istart = DAXPeriodic(wrap) ? -s : (PetscMax(-s,-i));
1268     iend   = DAXPeriodic(wrap) ?  s : (PetscMin(s,m-i-1));
1269     for (j=ys; j<ys+ny; j++) {
1270       jstart = DAYPeriodic(wrap) ? -s : (PetscMax(-s,-j));
1271       jend   = DAYPeriodic(wrap) ?  s : (PetscMin(s,n-j-1));
1272       slot = i - gxs + gnx*(j - gys);
1273 
1274       /* Find block columns in block row */
1275       cnt  = 0;
1276       for (ii=istart; ii<iend+1; ii++) {
1277         for (jj=jstart; jj<jend+1; jj++) {
1278           if (st == DA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1279             cols[cnt++]  = slot + ii + gnx*jj;
1280           }
1281         }
1282       }
1283       ierr = MatPreallocateSetLocal(ltogb,1,&slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
1284     }
1285   }
1286   ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1287   ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1288   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1289 
1290   ierr = MatSetLocalToGlobalMapping(J,ltog);CHKERRQ(ierr);
1291   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb);CHKERRQ(ierr);
1292 
1293   /*
1294     For each node in the grid: we get the neighbors in the local (on processor ordering
1295     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1296     PETSc ordering.
1297   */
1298   if (!dd->prealloc_only) {
1299     ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1300     ierr = PetscMemzero(values,col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1301     for (i=xs; i<xs+nx; i++) {
1302       istart = DAXPeriodic(wrap) ? -s : (PetscMax(-s,-i));
1303       iend   = DAXPeriodic(wrap) ?  s : (PetscMin(s,m-i-1));
1304       for (j=ys; j<ys+ny; j++) {
1305 	jstart = DAYPeriodic(wrap) ? -s : (PetscMax(-s,-j));
1306 	jend   = DAYPeriodic(wrap) ?  s : (PetscMin(s,n-j-1));
1307 	slot = i - gxs + gnx*(j - gys);
1308 	cnt  = 0;
1309         for (ii=istart; ii<iend+1; ii++) {
1310           for (jj=jstart; jj<jend+1; jj++) {
1311             if (st == DA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1312               cols[cnt++]  = slot + ii + gnx*jj;
1313             }
1314           }
1315         }
1316 	ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1317       }
1318     }
1319     ierr = PetscFree(values);CHKERRQ(ierr);
1320     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1321     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1322   }
1323   ierr = PetscFree(cols);CHKERRQ(ierr);
1324   PetscFunctionReturn(0);
1325 }
1326 
1327 #undef __FUNCT__
1328 #define __FUNCT__ "DAGetMatrix3d_MPIBAIJ"
1329 PetscErrorCode DAGetMatrix3d_MPIBAIJ(DM da,Mat J)
1330 {
1331   PetscErrorCode         ierr;
1332   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1333   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
1334   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1335   MPI_Comm               comm;
1336   PetscScalar            *values;
1337   DAPeriodicType         wrap;
1338   DAStencilType          st;
1339   ISLocalToGlobalMapping ltog,ltogb;
1340   DM_DA                  *dd = (DM_DA*)da->data;
1341 
1342   PetscFunctionBegin;
1343   /*
1344          nc - number of components per grid point
1345          col - number of colors needed in one direction for single component problem
1346 
1347   */
1348   ierr = DAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&wrap,&st);CHKERRQ(ierr);
1349   col    = 2*s + 1;
1350 
1351   ierr = DAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1352   ierr = DAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1353   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1354 
1355   ierr  = PetscMalloc(col*col*col*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1356 
1357   ierr = DAGetISLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1358   ierr = DAGetISLocalToGlobalMappingBlck(da,&ltogb);CHKERRQ(ierr);
1359 
1360   /* determine the matrix preallocation information */
1361   ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1362   for (i=xs; i<xs+nx; i++) {
1363     istart = DAXPeriodic(wrap) ? -s : (PetscMax(-s,-i));
1364     iend   = DAXPeriodic(wrap) ?  s : (PetscMin(s,m-i-1));
1365     for (j=ys; j<ys+ny; j++) {
1366       jstart = DAYPeriodic(wrap) ? -s : (PetscMax(-s,-j));
1367       jend   = DAYPeriodic(wrap) ?  s : (PetscMin(s,n-j-1));
1368       for (k=zs; k<zs+nz; k++) {
1369 	kstart = DAZPeriodic(wrap) ? -s : (PetscMax(-s,-k));
1370 	kend   = DAZPeriodic(wrap) ?  s : (PetscMin(s,p-k-1));
1371 
1372 	slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1373 
1374 	/* Find block columns in block row */
1375 	cnt  = 0;
1376         for (ii=istart; ii<iend+1; ii++) {
1377           for (jj=jstart; jj<jend+1; jj++) {
1378             for (kk=kstart; kk<kend+1; kk++) {
1379               if ((st == DA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1380 		cols[cnt++]  = slot + ii + gnx*jj + gnx*gny*kk;
1381 	      }
1382 	    }
1383 	  }
1384 	}
1385 	ierr = MatPreallocateSetLocal(ltogb,1,&slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
1386       }
1387     }
1388   }
1389   ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1390   ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1391   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1392 
1393   ierr = MatSetLocalToGlobalMapping(J,ltog);CHKERRQ(ierr);
1394   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb);CHKERRQ(ierr);
1395 
1396   /*
1397     For each node in the grid: we get the neighbors in the local (on processor ordering
1398     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1399     PETSc ordering.
1400   */
1401   if (!dd->prealloc_only) {
1402     ierr  = PetscMalloc(col*col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1403     ierr  = PetscMemzero(values,col*col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1404     for (i=xs; i<xs+nx; i++) {
1405       istart = DAXPeriodic(wrap) ? -s : (PetscMax(-s,-i));
1406       iend   = DAXPeriodic(wrap) ?  s : (PetscMin(s,m-i-1));
1407       for (j=ys; j<ys+ny; j++) {
1408 	jstart = DAYPeriodic(wrap) ? -s : (PetscMax(-s,-j));
1409 	jend   = DAYPeriodic(wrap) ?  s : (PetscMin(s,n-j-1));
1410 	for (k=zs; k<zs+nz; k++) {
1411 	  kstart = DAZPeriodic(wrap) ? -s : (PetscMax(-s,-k));
1412 	  kend   = DAZPeriodic(wrap) ?  s : (PetscMin(s,p-k-1));
1413 
1414 	  slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1415 
1416 	  cnt  = 0;
1417           for (ii=istart; ii<iend+1; ii++) {
1418             for (jj=jstart; jj<jend+1; jj++) {
1419               for (kk=kstart; kk<kend+1; kk++) {
1420                 if ((st == DA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1421                   cols[cnt++]  = slot + ii + gnx*jj + gnx*gny*kk;
1422                 }
1423               }
1424             }
1425           }
1426 	  ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1427 	}
1428       }
1429     }
1430     ierr = PetscFree(values);CHKERRQ(ierr);
1431     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1432     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1433   }
1434   ierr = PetscFree(cols);CHKERRQ(ierr);
1435   PetscFunctionReturn(0);
1436 }
1437 
1438 #undef __FUNCT__
1439 #define __FUNCT__ "L2GFilterUpperTriangular"
1440 /*
1441   This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to
1442   identify in the local ordering with periodic domain.
1443 */
1444 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog,PetscInt *row,PetscInt *cnt,PetscInt col[])
1445 {
1446   PetscErrorCode ierr;
1447   PetscInt i,n;
1448 
1449   PetscFunctionBegin;
1450   ierr = ISLocalToGlobalMappingApply(ltog,1,row,row);CHKERRQ(ierr);
1451   ierr = ISLocalToGlobalMappingApply(ltog,*cnt,col,col);CHKERRQ(ierr);
1452   for (i=0,n=0; i<*cnt; i++) {
1453     if (col[i] >= *row) col[n++] = col[i];
1454   }
1455   *cnt = n;
1456   PetscFunctionReturn(0);
1457 }
1458 
1459 #undef __FUNCT__
1460 #define __FUNCT__ "DAGetMatrix2d_MPISBAIJ"
1461 PetscErrorCode DAGetMatrix2d_MPISBAIJ(DM da,Mat J)
1462 {
1463   PetscErrorCode         ierr;
1464   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1465   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
1466   PetscInt               istart,iend,jstart,jend,ii,jj;
1467   MPI_Comm               comm;
1468   PetscScalar            *values;
1469   DAPeriodicType         wrap;
1470   DAStencilType          st;
1471   ISLocalToGlobalMapping ltog,ltogb;
1472   DM_DA                  *dd = (DM_DA*)da->data;
1473 
1474   PetscFunctionBegin;
1475   /*
1476      nc - number of components per grid point
1477      col - number of colors needed in one direction for single component problem
1478   */
1479   ierr = DAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&wrap,&st);CHKERRQ(ierr);
1480   col = 2*s + 1;
1481 
1482   ierr = DAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
1483   ierr = DAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
1484   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1485 
1486   ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1487 
1488   ierr = DAGetISLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1489   ierr = DAGetISLocalToGlobalMappingBlck(da,&ltogb);CHKERRQ(ierr);
1490 
1491   /* determine the matrix preallocation information */
1492   ierr = MatPreallocateSymmetricInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
1493   for (i=xs; i<xs+nx; i++) {
1494     istart = DAXPeriodic(wrap) ? -s : (PetscMax(-s,-i));
1495     iend   = DAXPeriodic(wrap) ?  s : (PetscMin(s,m-i-1));
1496     for (j=ys; j<ys+ny; j++) {
1497       jstart = DAYPeriodic(wrap) ? -s : (PetscMax(-s,-j));
1498       jend   = DAYPeriodic(wrap) ?  s : (PetscMin(s,n-j-1));
1499       slot = i - gxs + gnx*(j - gys);
1500 
1501       /* Find block columns in block row */
1502       cnt  = 0;
1503       for (ii=istart; ii<iend+1; ii++) {
1504         for (jj=jstart; jj<jend+1; jj++) {
1505           if (st == DA_STENCIL_BOX || !ii || !jj) {
1506             cols[cnt++]  = slot + ii + gnx*jj;
1507           }
1508         }
1509       }
1510       ierr = L2GFilterUpperTriangular(ltogb,&slot,&cnt,cols);CHKERRQ(ierr);
1511       ierr = MatPreallocateSymmetricSet(slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
1512     }
1513   }
1514   ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1515   ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1516   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1517 
1518   ierr = MatSetLocalToGlobalMapping(J,ltog);CHKERRQ(ierr);
1519   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb);CHKERRQ(ierr);
1520 
1521   /*
1522     For each node in the grid: we get the neighbors in the local (on processor ordering
1523     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1524     PETSc ordering.
1525   */
1526   if (!dd->prealloc_only) {
1527     ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1528     ierr = PetscMemzero(values,col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1529     for (i=xs; i<xs+nx; i++) {
1530       istart = DAXPeriodic(wrap) ? -s : (PetscMax(-s,-i));
1531       iend   = DAXPeriodic(wrap) ?  s : (PetscMin(s,m-i-1));
1532       for (j=ys; j<ys+ny; j++) {
1533         jstart = DAYPeriodic(wrap) ? -s : (PetscMax(-s,-j));
1534         jend   = DAYPeriodic(wrap) ?  s : (PetscMin(s,n-j-1));
1535         slot = i - gxs + gnx*(j - gys);
1536 
1537         /* Find block columns in block row */
1538         cnt  = 0;
1539         for (ii=istart; ii<iend+1; ii++) {
1540           for (jj=jstart; jj<jend+1; jj++) {
1541             if (st == DA_STENCIL_BOX || !ii || !jj) {
1542               cols[cnt++]  = slot + ii + gnx*jj;
1543             }
1544           }
1545         }
1546         ierr = L2GFilterUpperTriangular(ltogb,&slot,&cnt,cols);CHKERRQ(ierr);
1547 	ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1548       }
1549     }
1550     ierr = PetscFree(values);CHKERRQ(ierr);
1551     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1552     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1553   }
1554   ierr = PetscFree(cols);CHKERRQ(ierr);
1555   PetscFunctionReturn(0);
1556 }
1557 
1558 #undef __FUNCT__
1559 #define __FUNCT__ "DAGetMatrix3d_MPISBAIJ"
1560 PetscErrorCode DAGetMatrix3d_MPISBAIJ(DM da,Mat J)
1561 {
1562   PetscErrorCode         ierr;
1563   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1564   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
1565   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1566   MPI_Comm               comm;
1567   PetscScalar            *values;
1568   DAPeriodicType         wrap;
1569   DAStencilType          st;
1570   ISLocalToGlobalMapping ltog,ltogb;
1571   DM_DA                  *dd = (DM_DA*)da->data;
1572 
1573   PetscFunctionBegin;
1574   /*
1575      nc - number of components per grid point
1576      col - number of colors needed in one direction for single component problem
1577   */
1578   ierr = DAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&wrap,&st);CHKERRQ(ierr);
1579   col = 2*s + 1;
1580 
1581   ierr = DAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1582   ierr = DAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1583   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1584 
1585   /* create the matrix */
1586   ierr = PetscMalloc(col*col*col*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1587 
1588   ierr = DAGetISLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1589   ierr = DAGetISLocalToGlobalMappingBlck(da,&ltogb);CHKERRQ(ierr);
1590 
1591   /* determine the matrix preallocation information */
1592   ierr = MatPreallocateSymmetricInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1593   for (i=xs; i<xs+nx; i++) {
1594     istart = DAXPeriodic(wrap) ? -s : (PetscMax(-s,-i));
1595     iend   = DAXPeriodic(wrap) ?  s : (PetscMin(s,m-i-1));
1596     for (j=ys; j<ys+ny; j++) {
1597       jstart = DAYPeriodic(wrap) ? -s : (PetscMax(-s,-j));
1598       jend   = DAYPeriodic(wrap) ?  s : (PetscMin(s,n-j-1));
1599       for (k=zs; k<zs+nz; k++) {
1600         kstart = DAZPeriodic(wrap) ? -s : (PetscMax(-s,-k));
1601 	kend   = DAZPeriodic(wrap) ?  s : (PetscMin(s,p-k-1));
1602 
1603 	slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1604 
1605 	/* Find block columns in block row */
1606 	cnt  = 0;
1607         for (ii=istart; ii<iend+1; ii++) {
1608           for (jj=jstart; jj<jend+1; jj++) {
1609             for (kk=kstart; kk<kend+1; kk++) {
1610               if ((st == DA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
1611                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1612               }
1613             }
1614           }
1615         }
1616         ierr = L2GFilterUpperTriangular(ltogb,&slot,&cnt,cols);CHKERRQ(ierr);
1617         ierr = MatPreallocateSymmetricSet(slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
1618       }
1619     }
1620   }
1621   ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1622   ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1623   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1624 
1625   ierr = MatSetLocalToGlobalMapping(J,ltog);CHKERRQ(ierr);
1626   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb);CHKERRQ(ierr);
1627 
1628   /*
1629     For each node in the grid: we get the neighbors in the local (on processor ordering
1630     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1631     PETSc ordering.
1632   */
1633   if (!dd->prealloc_only) {
1634     ierr = PetscMalloc(col*col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1635     ierr = PetscMemzero(values,col*col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1636     for (i=xs; i<xs+nx; i++) {
1637       istart = DAXPeriodic(wrap) ? -s : (PetscMax(-s,-i));
1638       iend   = DAXPeriodic(wrap) ?  s : (PetscMin(s,m-i-1));
1639       for (j=ys; j<ys+ny; j++) {
1640         jstart = DAYPeriodic(wrap) ? -s : (PetscMax(-s,-j));
1641 	jend   = DAYPeriodic(wrap) ?  s : (PetscMin(s,n-j-1));
1642 	for (k=zs; k<zs+nz; k++) {
1643           kstart = DAZPeriodic(wrap) ? -s : (PetscMax(-s,-k));
1644 	  kend   = DAZPeriodic(wrap) ?  s : (PetscMin(s,p-k-1));
1645 
1646 	  slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1647 
1648 	  cnt  = 0;
1649           for (ii=istart; ii<iend+1; ii++) {
1650             for (jj=jstart; jj<jend+1; jj++) {
1651               for (kk=kstart; kk<kend+1; kk++) {
1652                 if ((st == DA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
1653 		  cols[cnt++]  = slot + ii + gnx*jj + gnx*gny*kk;
1654 		}
1655 	      }
1656 	    }
1657 	  }
1658           ierr = L2GFilterUpperTriangular(ltogb,&slot,&cnt,cols);CHKERRQ(ierr);
1659           ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1660 	}
1661       }
1662     }
1663     ierr = PetscFree(values);CHKERRQ(ierr);
1664     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1665     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1666   }
1667   ierr = PetscFree(cols);CHKERRQ(ierr);
1668   PetscFunctionReturn(0);
1669 }
1670 
1671 /* ---------------------------------------------------------------------------------*/
1672 
1673 #undef __FUNCT__
1674 #define __FUNCT__ "DAGetMatrix3d_MPIAIJ_Fill"
1675 PetscErrorCode DAGetMatrix3d_MPIAIJ_Fill(DM da,Mat J)
1676 {
1677   PetscErrorCode         ierr;
1678   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1679   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt,l,p,*dnz,*onz;
1680   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1681   DM_DA                  *dd = (DM_DA*)da->data;
1682   PetscInt               ifill_col,*dfill = dd->dfill,*ofill = dd->ofill;
1683   MPI_Comm               comm;
1684   PetscScalar            *values;
1685   DAPeriodicType         wrap;
1686   ISLocalToGlobalMapping ltog,ltogb;
1687   DAStencilType          st;
1688 
1689   PetscFunctionBegin;
1690   /*
1691          nc - number of components per grid point
1692          col - number of colors needed in one direction for single component problem
1693 
1694   */
1695   ierr = DAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&wrap,&st);CHKERRQ(ierr);
1696   col    = 2*s + 1;
1697   if (DAXPeriodic(wrap) && (m % col)){
1698     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
1699                  by 2*stencil_width + 1\n");
1700   }
1701   if (DAYPeriodic(wrap) && (n % col)){
1702     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
1703                  by 2*stencil_width + 1\n");
1704   }
1705   if (DAZPeriodic(wrap) && (p % col)){
1706     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
1707                  by 2*stencil_width + 1\n");
1708   }
1709 
1710   ierr = DAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1711   ierr = DAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1712   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1713 
1714   ierr = PetscMalloc(col*col*col*nc*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1715   ierr = DAGetISLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1716   ierr = DAGetISLocalToGlobalMappingBlck(da,&ltogb);CHKERRQ(ierr);
1717 
1718   /* determine the matrix preallocation information */
1719   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1720 
1721 
1722   for (i=xs; i<xs+nx; i++) {
1723     istart = DAXPeriodic(wrap) ? -s : (PetscMax(-s,-i));
1724     iend   = DAXPeriodic(wrap) ?  s : (PetscMin(s,m-i-1));
1725     for (j=ys; j<ys+ny; j++) {
1726       jstart = DAYPeriodic(wrap) ? -s : (PetscMax(-s,-j));
1727       jend   = DAYPeriodic(wrap) ?  s : (PetscMin(s,n-j-1));
1728       for (k=zs; k<zs+nz; k++) {
1729         kstart = DAZPeriodic(wrap) ? -s : (PetscMax(-s,-k));
1730         kend   = DAZPeriodic(wrap) ?  s : (PetscMin(s,p-k-1));
1731 
1732         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1733 
1734 	for (l=0; l<nc; l++) {
1735 	  cnt  = 0;
1736 	  for (ii=istart; ii<iend+1; ii++) {
1737 	    for (jj=jstart; jj<jend+1; jj++) {
1738 	      for (kk=kstart; kk<kend+1; kk++) {
1739 		if (ii || jj || kk) {
1740 		  if ((st == DA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1741 		    for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++)
1742 		      cols[cnt++]  = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1743 		  }
1744 		} else {
1745 		  if (dfill) {
1746 		    for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++)
1747 		      cols[cnt++]  = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1748 		  } else {
1749 		    for (ifill_col=0; ifill_col<nc; ifill_col++)
1750 		      cols[cnt++]  = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1751 		  }
1752 		}
1753 	      }
1754 	    }
1755 	  }
1756 	  row  = l + nc*(slot);
1757 	  ierr = MatPreallocateSetLocal(ltog,1,&row,cnt,cols,dnz,onz);CHKERRQ(ierr);
1758 	}
1759       }
1760     }
1761   }
1762   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1763   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1764   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1765   ierr = MatSetLocalToGlobalMapping(J,ltog);CHKERRQ(ierr);
1766   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb);CHKERRQ(ierr);
1767 
1768   /*
1769     For each node in the grid: we get the neighbors in the local (on processor ordering
1770     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1771     PETSc ordering.
1772   */
1773   if (!dd->prealloc_only) {
1774     ierr = PetscMalloc(col*col*col*nc*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1775     ierr = PetscMemzero(values,col*col*col*nc*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1776     for (i=xs; i<xs+nx; i++) {
1777       istart = DAXPeriodic(wrap) ? -s : (PetscMax(-s,-i));
1778       iend   = DAXPeriodic(wrap) ?  s : (PetscMin(s,m-i-1));
1779       for (j=ys; j<ys+ny; j++) {
1780 	jstart = DAYPeriodic(wrap) ? -s : (PetscMax(-s,-j));
1781 	jend   = DAYPeriodic(wrap) ?  s : (PetscMin(s,n-j-1));
1782 	for (k=zs; k<zs+nz; k++) {
1783 	  kstart = DAZPeriodic(wrap) ? -s : (PetscMax(-s,-k));
1784 	  kend   = DAZPeriodic(wrap) ?  s : (PetscMin(s,p-k-1));
1785 
1786 	  slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1787 
1788 	  for (l=0; l<nc; l++) {
1789 	    cnt  = 0;
1790 	    for (ii=istart; ii<iend+1; ii++) {
1791 	      for (jj=jstart; jj<jend+1; jj++) {
1792 		for (kk=kstart; kk<kend+1; kk++) {
1793 		  if (ii || jj || kk) {
1794 		    if ((st == DA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1795 		      for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++)
1796 			cols[cnt++]  = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1797 		    }
1798 		  } else {
1799 		    if (dfill) {
1800 		      for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++)
1801 			cols[cnt++]  = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1802 		    } else {
1803 		      for (ifill_col=0; ifill_col<nc; ifill_col++)
1804 			cols[cnt++]  = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1805 		    }
1806 		  }
1807 		}
1808 	      }
1809 	    }
1810 	    row  = l + nc*(slot);
1811 	    ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1812 	  }
1813 	}
1814       }
1815     }
1816     ierr = PetscFree(values);CHKERRQ(ierr);
1817     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1818     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1819   }
1820   ierr = PetscFree(cols);CHKERRQ(ierr);
1821   PetscFunctionReturn(0);
1822 }
1823