xref: /petsc/src/dm/impls/da/fdda.c (revision 7d0a6c19129e7069c8a40e210b34ed62989173db)
1 
2 #include "private/daimpl.h" /*I      "petscdm.h"     I*/
3 #include "petscmat.h"         /*I      "petscmat.h"    I*/
4 #include "private/matimpl.h"
5 
6 extern PetscErrorCode DMGetColoring_DA_1d_MPIAIJ(DM,ISColoringType,ISColoring *);
7 extern PetscErrorCode DMGetColoring_DA_2d_MPIAIJ(DM,ISColoringType,ISColoring *);
8 extern PetscErrorCode DMGetColoring_DA_2d_5pt_MPIAIJ(DM,ISColoringType,ISColoring *);
9 extern PetscErrorCode DMGetColoring_DA_3d_MPIAIJ(DM,ISColoringType,ISColoring *);
10 
11 /*
12    For ghost i that may be negative or greater than the upper bound this
13   maps it into the 0:m-1 range using periodicity
14 */
15 #define SetInRange(i,m) ((i < 0) ? m+i:((i >= m) ? i-m:i))
16 
17 #undef __FUNCT__
18 #define __FUNCT__ "DMDASetBlockFills_Private"
19 static PetscErrorCode DMDASetBlockFills_Private(PetscInt *dfill,PetscInt w,PetscInt **rfill)
20 {
21   PetscErrorCode ierr;
22   PetscInt       i,j,nz,*fill;
23 
24   PetscFunctionBegin;
25   if (!dfill) PetscFunctionReturn(0);
26 
27   /* count number nonzeros */
28   nz = 0;
29   for (i=0; i<w; i++) {
30     for (j=0; j<w; j++) {
31       if (dfill[w*i+j]) nz++;
32     }
33   }
34   ierr = PetscMalloc((nz + w + 1)*sizeof(PetscInt),&fill);CHKERRQ(ierr);
35   /* construct modified CSR storage of nonzero structure */
36   nz = w + 1;
37   for (i=0; i<w; i++) {
38     fill[i] = nz;
39     for (j=0; j<w; j++) {
40       if (dfill[w*i+j]) {
41 	fill[nz] = j;
42 	nz++;
43       }
44     }
45   }
46   fill[w] = nz;
47 
48   *rfill = fill;
49   PetscFunctionReturn(0);
50 }
51 
52 #undef __FUNCT__
53 #define __FUNCT__ "DMDASetMatPreallocateOnly"
54 /*@
55     DMDASetMatPreallocateOnly - When DMGetMatrix() is called the matrix will be properly
56        preallocated but the nonzero structure and zero values will not be set.
57 
58     Logically Collective on DMDA
59 
60     Input Parameter:
61 +   da - the distributed array
62 -   only - PETSC_TRUE if only want preallocation
63 
64 
65     Level: developer
66 
67 .seealso DMGetMatrix(), DMDASetGetMatrix(), DMDASetBlockSize(), DMDASetBlockFills()
68 
69 @*/
70 PetscErrorCode  DMDASetMatPreallocateOnly(DM da,PetscBool  only)
71 {
72   DM_DA *dd = (DM_DA*)da->data;
73 
74   PetscFunctionBegin;
75   dd->prealloc_only = only;
76   PetscFunctionReturn(0);
77 }
78 
79 #undef __FUNCT__
80 #define __FUNCT__ "DMDASetBlockFills"
81 /*@
82     DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem
83     of the matrix returned by DMGetMatrix().
84 
85     Logically Collective on DMDA
86 
87     Input Parameter:
88 +   da - the distributed array
89 .   dfill - the fill pattern in the diagonal block (may be PETSC_NULL, means use dense block)
90 -   ofill - the fill pattern in the off-diagonal blocks
91 
92 
93     Level: developer
94 
95     Notes: This only makes sense when you are doing multicomponent problems but using the
96        MPIAIJ matrix format
97 
98            The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries
99        representing coupling and 0 entries for missing coupling. For example
100 $             dfill[9] = {1, 0, 0,
101 $                         1, 1, 0,
102 $                         0, 1, 1}
103        means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with
104        itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the
105        diagonal block).
106 
107      DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
108      can be represented in the dfill, ofill format
109 
110    Contributed by Glenn Hammond
111 
112 .seealso DMGetMatrix(), DMDASetGetMatrix(), DMDASetMatPreallocateOnly()
113 
114 @*/
115 PetscErrorCode  DMDASetBlockFills(DM da,PetscInt *dfill,PetscInt *ofill)
116 {
117   DM_DA          *dd = (DM_DA*)da->data;
118   PetscErrorCode ierr;
119 
120   PetscFunctionBegin;
121   ierr = DMDASetBlockFills_Private(dfill,dd->w,&dd->dfill);CHKERRQ(ierr);
122   ierr = DMDASetBlockFills_Private(ofill,dd->w,&dd->ofill);CHKERRQ(ierr);
123   PetscFunctionReturn(0);
124 }
125 
126 
127 #undef __FUNCT__
128 #define __FUNCT__ "DMGetColoring_DA"
129 PetscErrorCode  DMGetColoring_DA(DM da,ISColoringType ctype,const MatType mtype,ISColoring *coloring)
130 {
131   PetscErrorCode   ierr;
132   PetscInt         dim,m,n,p,nc;
133   DMDABoundaryType bx,by,bz;
134   MPI_Comm         comm;
135   PetscMPIInt      size;
136   PetscBool        isBAIJ;
137   DM_DA            *dd = (DM_DA*)da->data;
138 
139   PetscFunctionBegin;
140   /*
141                                   m
142           ------------------------------------------------------
143          |                                                     |
144          |                                                     |
145          |               ----------------------                |
146          |               |                    |                |
147       n  |           yn  |                    |                |
148          |               |                    |                |
149          |               .---------------------                |
150          |             (xs,ys)     xn                          |
151          |            .                                        |
152          |         (gxs,gys)                                   |
153          |                                                     |
154           -----------------------------------------------------
155   */
156 
157   /*
158          nc - number of components per grid point
159          col - number of colors needed in one direction for single component problem
160 
161   */
162   ierr = DMDAGetInfo(da,&dim,0,0,0,&m,&n,&p,&nc,0,&bx,&by,&bz,0);CHKERRQ(ierr);
163 
164   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
165   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
166   if (ctype == IS_COLORING_GHOSTED){
167     if (size == 1) {
168       ctype = IS_COLORING_GLOBAL;
169     } else if (dim > 1){
170       if ((m==1 && bx == DMDA_BOUNDARY_PERIODIC) || (n==1 && by == DMDA_BOUNDARY_PERIODIC) || (p==1 && bz == DMDA_BOUNDARY_PERIODIC)){
171         SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"IS_COLORING_GHOSTED cannot be used for periodic boundary condition having both ends of the domain  on the same process");
172       }
173     }
174   }
175 
176   /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ
177      matrices is for the blocks, not the individual matrix elements  */
178   ierr = PetscStrcmp(mtype,MATBAIJ,&isBAIJ);CHKERRQ(ierr);
179   if (!isBAIJ) {ierr = PetscStrcmp(mtype,MATMPIBAIJ,&isBAIJ);CHKERRQ(ierr);}
180   if (!isBAIJ) {ierr = PetscStrcmp(mtype,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);}
181   if (isBAIJ) {
182     dd->w = 1;
183     dd->xs = dd->xs/nc;
184     dd->xe = dd->xe/nc;
185     dd->Xs = dd->Xs/nc;
186     dd->Xe = dd->Xe/nc;
187   }
188 
189   /*
190      We do not provide a getcoloring function in the DMDA operations because
191    the basic DMDA does not know about matrices. We think of DMDA as being more
192    more low-level then matrices.
193   */
194   if (dim == 1) {
195     ierr = DMGetColoring_DA_1d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
196   } else if (dim == 2) {
197     ierr =  DMGetColoring_DA_2d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
198   } else if (dim == 3) {
199     ierr =  DMGetColoring_DA_3d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
200   } else {
201     SETERRQ1(((PetscObject)da)->comm,PETSC_ERR_SUP,"Not done for %D dimension, send us mail petsc-maint@mcs.anl.gov for code",dim);
202   }
203   if (isBAIJ) {
204     dd->w = nc;
205     dd->xs = dd->xs*nc;
206     dd->xe = dd->xe*nc;
207     dd->Xs = dd->Xs*nc;
208     dd->Xe = dd->Xe*nc;
209   }
210   PetscFunctionReturn(0);
211 }
212 
213 /* ---------------------------------------------------------------------------------*/
214 
215 #undef __FUNCT__
216 #define __FUNCT__ "DMGetColoring_DA_2d_MPIAIJ"
217 PetscErrorCode DMGetColoring_DA_2d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
218 {
219   PetscErrorCode         ierr;
220   PetscInt               xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,M,N,dim,s,k,nc,col;
221   PetscInt               ncolors;
222   MPI_Comm               comm;
223   DMDABoundaryType       bx,by;
224   DMDAStencilType        st;
225   ISColoringValue        *colors;
226   DM_DA                  *dd = (DM_DA*)da->data;
227 
228   PetscFunctionBegin;
229   /*
230          nc - number of components per grid point
231          col - number of colors needed in one direction for single component problem
232 
233   */
234   ierr = DMDAGetInfo(da,&dim,&m,&n,0,&M,&N,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
235   col    = 2*s + 1;
236   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
237   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
238   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
239 
240   /* special case as taught to us by Paul Hovland */
241   if (st == DMDA_STENCIL_STAR && s == 1) {
242     ierr = DMGetColoring_DA_2d_5pt_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
243   } else {
244 
245     if (bx == DMDA_BOUNDARY_PERIODIC && (m % col)){
246       SETERRQ2(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X (%d) is divisible\n\
247                  by 2*stencil_width + 1 (%d)\n", m, col);
248     }
249     if (by == DMDA_BOUNDARY_PERIODIC && (n % col)){
250       SETERRQ2(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y (%d) is divisible\n\
251                  by 2*stencil_width + 1 (%d)\n", n, col);
252     }
253     if (ctype == IS_COLORING_GLOBAL) {
254       if (!dd->localcoloring) {
255 	ierr = PetscMalloc(nc*nx*ny*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
256 	ii = 0;
257 	for (j=ys; j<ys+ny; j++) {
258 	  for (i=xs; i<xs+nx; i++) {
259 	    for (k=0; k<nc; k++) {
260 	      colors[ii++] = k + nc*((i % col) + col*(j % col));
261 	    }
262 	  }
263 	}
264         ncolors = nc + nc*(col-1 + col*(col-1));
265 	ierr = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,&dd->localcoloring);CHKERRQ(ierr);
266       }
267       *coloring = dd->localcoloring;
268     } else if (ctype == IS_COLORING_GHOSTED) {
269       if (!dd->ghostedcoloring) {
270 	ierr = PetscMalloc(nc*gnx*gny*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
271 	ii = 0;
272 	for (j=gys; j<gys+gny; j++) {
273 	  for (i=gxs; i<gxs+gnx; i++) {
274 	    for (k=0; k<nc; k++) {
275 	      /* the complicated stuff is to handle periodic boundaries */
276 	      colors[ii++] = k + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col));
277 	    }
278 	  }
279 	}
280         ncolors = nc + nc*(col - 1 + col*(col-1));
281 	ierr = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,&dd->ghostedcoloring);CHKERRQ(ierr);
282         /* PetscIntView(ncolors,(PetscInt *)colors,0); */
283 
284 	ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_GHOSTED);CHKERRQ(ierr);
285       }
286       *coloring = dd->ghostedcoloring;
287     } else SETERRQ1(((PetscObject)da)->comm,PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
288   }
289   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
290   PetscFunctionReturn(0);
291 }
292 
293 /* ---------------------------------------------------------------------------------*/
294 
295 #undef __FUNCT__
296 #define __FUNCT__ "DMGetColoring_DA_3d_MPIAIJ"
297 PetscErrorCode DMGetColoring_DA_3d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
298 {
299   PetscErrorCode    ierr;
300   PetscInt          xs,ys,nx,ny,i,j,gxs,gys,gnx,gny,m,n,p,dim,s,k,nc,col,zs,gzs,ii,l,nz,gnz,M,N,P;
301   PetscInt          ncolors;
302   MPI_Comm          comm;
303   DMDABoundaryType  bx,by,bz;
304   DMDAStencilType   st;
305   ISColoringValue   *colors;
306   DM_DA             *dd = (DM_DA*)da->data;
307 
308   PetscFunctionBegin;
309   /*
310          nc - number of components per grid point
311          col - number of colors needed in one direction for single component problem
312 
313   */
314   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
315   col    = 2*s + 1;
316   if (bx == DMDA_BOUNDARY_PERIODIC && (m % col)){
317     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
318                  by 2*stencil_width + 1\n");
319   }
320   if (by == DMDA_BOUNDARY_PERIODIC && (n % col)){
321     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
322                  by 2*stencil_width + 1\n");
323   }
324   if (bz == DMDA_BOUNDARY_PERIODIC && (p % col)){
325     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
326                  by 2*stencil_width + 1\n");
327   }
328 
329   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
330   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
331   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
332 
333   /* create the coloring */
334   if (ctype == IS_COLORING_GLOBAL) {
335     if (!dd->localcoloring) {
336       ierr = PetscMalloc(nc*nx*ny*nz*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
337       ii = 0;
338       for (k=zs; k<zs+nz; k++) {
339         for (j=ys; j<ys+ny; j++) {
340           for (i=xs; i<xs+nx; i++) {
341             for (l=0; l<nc; l++) {
342               colors[ii++] = l + nc*((i % col) + col*(j % col) + col*col*(k % col));
343             }
344           }
345         }
346       }
347       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
348       ierr = ISColoringCreate(comm,ncolors,nc*nx*ny*nz,colors,&dd->localcoloring);CHKERRQ(ierr);
349     }
350     *coloring = dd->localcoloring;
351   } else if (ctype == IS_COLORING_GHOSTED) {
352     if (!dd->ghostedcoloring) {
353       ierr = PetscMalloc(nc*gnx*gny*gnz*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
354       ii = 0;
355       for (k=gzs; k<gzs+gnz; k++) {
356         for (j=gys; j<gys+gny; j++) {
357           for (i=gxs; i<gxs+gnx; i++) {
358             for (l=0; l<nc; l++) {
359               /* the complicated stuff is to handle periodic boundaries */
360               colors[ii++] = l + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col) + col*col*(SetInRange(k,p) % col));
361             }
362           }
363         }
364       }
365       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
366       ierr = ISColoringCreate(comm,ncolors,nc*gnx*gny*gnz,colors,&dd->ghostedcoloring);CHKERRQ(ierr);
367       ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_GHOSTED);CHKERRQ(ierr);
368     }
369     *coloring = dd->ghostedcoloring;
370   } else SETERRQ1(((PetscObject)da)->comm,PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
371   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
372   PetscFunctionReturn(0);
373 }
374 
375 /* ---------------------------------------------------------------------------------*/
376 
377 #undef __FUNCT__
378 #define __FUNCT__ "DMGetColoring_DA_1d_MPIAIJ"
379 PetscErrorCode DMGetColoring_DA_1d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
380 {
381   PetscErrorCode    ierr;
382   PetscInt          xs,nx,i,i1,gxs,gnx,l,m,M,dim,s,nc,col;
383   PetscInt          ncolors;
384   MPI_Comm          comm;
385   DMDABoundaryType  bx;
386   ISColoringValue   *colors;
387   DM_DA             *dd = (DM_DA*)da->data;
388 
389   PetscFunctionBegin;
390   /*
391          nc - number of components per grid point
392          col - number of colors needed in one direction for single component problem
393 
394   */
395   ierr = DMDAGetInfo(da,&dim,&m,0,0,&M,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr);
396   col    = 2*s + 1;
397 
398   if (bx == DMDA_BOUNDARY_PERIODIC && (m % col)) {
399     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points is divisible\n\
400                  by 2*stencil_width + 1\n");
401   }
402 
403   ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
404   ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
405   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
406 
407   /* create the coloring */
408   if (ctype == IS_COLORING_GLOBAL) {
409     if (!dd->localcoloring) {
410       ierr = PetscMalloc(nc*nx*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
411       i1 = 0;
412       for (i=xs; i<xs+nx; i++) {
413         for (l=0; l<nc; l++) {
414           colors[i1++] = l + nc*(i % col);
415         }
416       }
417       ncolors = nc + nc*(col-1);
418       ierr = ISColoringCreate(comm,ncolors,nc*nx,colors,&dd->localcoloring);CHKERRQ(ierr);
419     }
420     *coloring = dd->localcoloring;
421   } else if (ctype == IS_COLORING_GHOSTED) {
422     if (!dd->ghostedcoloring) {
423       ierr = PetscMalloc(nc*gnx*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
424       i1 = 0;
425       for (i=gxs; i<gxs+gnx; i++) {
426         for (l=0; l<nc; l++) {
427           /* the complicated stuff is to handle periodic boundaries */
428           colors[i1++] = l + nc*(SetInRange(i,m) % col);
429         }
430       }
431       ncolors = nc + nc*(col-1);
432       ierr = ISColoringCreate(comm,ncolors,nc*gnx,colors,&dd->ghostedcoloring);CHKERRQ(ierr);
433       ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_GHOSTED);CHKERRQ(ierr);
434     }
435     *coloring = dd->ghostedcoloring;
436   } else SETERRQ1(((PetscObject)da)->comm,PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
437   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
438   PetscFunctionReturn(0);
439 }
440 
441 #undef __FUNCT__
442 #define __FUNCT__ "DMGetColoring_DA_2d_5pt_MPIAIJ"
443 PetscErrorCode DMGetColoring_DA_2d_5pt_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
444 {
445   PetscErrorCode    ierr;
446   PetscInt          xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,dim,s,k,nc;
447   PetscInt          ncolors;
448   MPI_Comm          comm;
449   DMDABoundaryType  bx,by;
450   ISColoringValue   *colors;
451   DM_DA             *dd = (DM_DA*)da->data;
452 
453   PetscFunctionBegin;
454   /*
455          nc - number of components per grid point
456          col - number of colors needed in one direction for single component problem
457 
458   */
459   ierr   = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,0);CHKERRQ(ierr);
460   ierr   = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
461   ierr   = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
462   ierr   = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
463 
464   if (bx == DMDA_BOUNDARY_PERIODIC && (m % 5)){
465     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
466                  by 5\n");
467   }
468   if (by == DMDA_BOUNDARY_PERIODIC && (n % 5)){
469     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
470                  by 5\n");
471   }
472 
473   /* create the coloring */
474   if (ctype == IS_COLORING_GLOBAL) {
475     if (!dd->localcoloring) {
476       ierr = PetscMalloc(nc*nx*ny*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
477       ii = 0;
478       for (j=ys; j<ys+ny; j++) {
479 	for (i=xs; i<xs+nx; i++) {
480 	  for (k=0; k<nc; k++) {
481 	    colors[ii++] = k + nc*((3*j+i) % 5);
482 	  }
483 	}
484       }
485       ncolors = 5*nc;
486       ierr = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,&dd->localcoloring);CHKERRQ(ierr);
487     }
488     *coloring = dd->localcoloring;
489   } else if (ctype == IS_COLORING_GHOSTED) {
490     if (!dd->ghostedcoloring) {
491       ierr = PetscMalloc(nc*gnx*gny*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
492       ii = 0;
493       for (j=gys; j<gys+gny; j++) {
494 	for (i=gxs; i<gxs+gnx; i++) {
495 	  for (k=0; k<nc; k++) {
496 	    colors[ii++] = k + nc*((3*SetInRange(j,n) + SetInRange(i,m)) % 5);
497 	  }
498 	}
499       }
500       ncolors = 5*nc;
501       ierr = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,&dd->ghostedcoloring);CHKERRQ(ierr);
502       ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_GHOSTED);CHKERRQ(ierr);
503     }
504     *coloring = dd->ghostedcoloring;
505   } else SETERRQ1(((PetscObject)da)->comm,PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
506   PetscFunctionReturn(0);
507 }
508 
509 /* =========================================================================== */
510 extern PetscErrorCode DMGetMatrix_DA_1d_MPIAIJ(DM,Mat);
511 extern PetscErrorCode DMGetMatrix_DA_2d_MPIAIJ(DM,Mat);
512 extern PetscErrorCode DMGetMatrix_DA_2d_MPIAIJ_Fill(DM,Mat);
513 extern PetscErrorCode DMGetMatrix_DA_3d_MPIAIJ(DM,Mat);
514 extern PetscErrorCode DMGetMatrix_DA_3d_MPIAIJ_Fill(DM,Mat);
515 extern PetscErrorCode DMGetMatrix_DA_2d_MPIBAIJ(DM,Mat);
516 extern PetscErrorCode DMGetMatrix_DA_3d_MPIBAIJ(DM,Mat);
517 extern PetscErrorCode DMGetMatrix_DA_2d_MPISBAIJ(DM,Mat);
518 extern PetscErrorCode DMGetMatrix_DA_3d_MPISBAIJ(DM,Mat);
519 
520 #undef __FUNCT__
521 #define __FUNCT__ "MatSetDA"
522 /*@
523    MatSetDA - Sets the DMDA that is to be used by the HYPRE_StructMatrix PETSc matrix
524 
525    Logically Collective on Mat
526 
527    Input Parameters:
528 +  mat - the matrix
529 -  da - the da
530 
531    Level: intermediate
532 
533 @*/
534 PetscErrorCode  MatSetDA(Mat mat,DM da)
535 {
536   PetscErrorCode ierr;
537 
538   PetscFunctionBegin;
539   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
540   PetscValidHeaderSpecific(da,DM_CLASSID,1);
541   ierr = PetscTryMethod(mat,"MatSetDA_C",(Mat,DM),(mat,da));CHKERRQ(ierr);
542   PetscFunctionReturn(0);
543 }
544 
545 EXTERN_C_BEGIN
546 #undef __FUNCT__
547 #define __FUNCT__ "MatView_MPI_DA"
548 PetscErrorCode  MatView_MPI_DA(Mat A,PetscViewer viewer)
549 {
550   DM             da;
551   PetscErrorCode ierr;
552   const char     *prefix;
553   Mat            Anatural;
554   AO             ao;
555   PetscInt       rstart,rend,*petsc,i;
556   IS             is;
557   MPI_Comm       comm;
558 
559   PetscFunctionBegin;
560   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
561   ierr = PetscObjectQuery((PetscObject)A,"DMDA",(PetscObject*)&da);CHKERRQ(ierr);
562   if (!da) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
563 
564   ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);
565   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
566   ierr = PetscMalloc((rend-rstart)*sizeof(PetscInt),&petsc);CHKERRQ(ierr);
567   for (i=rstart; i<rend; i++) petsc[i-rstart] = i;
568   ierr = AOApplicationToPetsc(ao,rend-rstart,petsc);CHKERRQ(ierr);
569   ierr = ISCreateGeneral(comm,rend-rstart,petsc,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
570 
571   /* call viewer on natural ordering */
572   ierr = MatGetSubMatrix(A,is,is,MAT_INITIAL_MATRIX,&Anatural);CHKERRQ(ierr);
573   ierr = ISDestroy(is);CHKERRQ(ierr);
574   ierr = PetscObjectGetOptionsPrefix((PetscObject)A,&prefix);CHKERRQ(ierr);
575   ierr = PetscObjectSetOptionsPrefix((PetscObject)Anatural,prefix);CHKERRQ(ierr);
576   ierr = PetscObjectSetName((PetscObject)Anatural,((PetscObject)A)->name);CHKERRQ(ierr);
577   ierr = MatView(Anatural,viewer);CHKERRQ(ierr);
578   ierr = MatDestroy(Anatural);CHKERRQ(ierr);
579   PetscFunctionReturn(0);
580 }
581 EXTERN_C_END
582 
583 EXTERN_C_BEGIN
584 #undef __FUNCT__
585 #define __FUNCT__ "MatLoad_MPI_DA"
586 PetscErrorCode  MatLoad_MPI_DA(Mat A,PetscViewer viewer)
587 {
588   DM             da;
589   PetscErrorCode ierr;
590   Mat            Anatural,Aapp;
591   AO             ao;
592   PetscInt       rstart,rend,*app,i;
593   IS             is;
594   MPI_Comm       comm;
595 
596   PetscFunctionBegin;
597   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
598   ierr = PetscObjectQuery((PetscObject)A,"DMDA",(PetscObject*)&da);CHKERRQ(ierr);
599   if (!da) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
600 
601   /* Load the matrix in natural ordering */
602   ierr = MatCreate(((PetscObject)A)->comm,&Anatural);CHKERRQ(ierr);
603   ierr = MatSetType(Anatural,((PetscObject)A)->type_name);CHKERRQ(ierr);
604   ierr = MatSetSizes(Anatural,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
605   ierr = MatLoad(Anatural,viewer);CHKERRQ(ierr);
606 
607   /* Map natural ordering to application ordering and create IS */
608   ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);
609   ierr = MatGetOwnershipRange(Anatural,&rstart,&rend);CHKERRQ(ierr);
610   ierr = PetscMalloc((rend-rstart)*sizeof(PetscInt),&app);CHKERRQ(ierr);
611   for (i=rstart; i<rend; i++) app[i-rstart] = i;
612   ierr = AOPetscToApplication(ao,rend-rstart,app);CHKERRQ(ierr);
613   ierr = ISCreateGeneral(comm,rend-rstart,app,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
614 
615   /* Do permutation and replace header */
616   ierr = MatGetSubMatrix(Anatural,is,is,MAT_INITIAL_MATRIX,&Aapp);CHKERRQ(ierr);
617   ierr = MatHeaderReplace(A,Aapp);CHKERRQ(ierr);
618   ierr = ISDestroy(is);CHKERRQ(ierr);
619   ierr = MatDestroy(Anatural);CHKERRQ(ierr);
620   PetscFunctionReturn(0);
621 }
622 EXTERN_C_END
623 
624 #undef __FUNCT__
625 #define __FUNCT__ "DMGetMatrix_DA"
626 PetscErrorCode  DMGetMatrix_DA(DM da, const MatType mtype,Mat *J)
627 {
628   PetscErrorCode ierr;
629   PetscInt       dim,dof,nx,ny,nz,dims[3],starts[3],M,N,P;
630   Mat            A;
631   MPI_Comm       comm;
632   const MatType  Atype;
633   void           (*aij)(void)=PETSC_NULL,(*baij)(void)=PETSC_NULL,(*sbaij)(void)=PETSC_NULL;
634   MatType        ttype[256];
635   PetscBool      flg;
636   PetscMPIInt    size;
637   DM_DA          *dd = (DM_DA*)da->data;
638 
639   PetscFunctionBegin;
640 #ifndef PETSC_USE_DYNAMIC_LIBRARIES
641   ierr = MatInitializePackage(PETSC_NULL);CHKERRQ(ierr);
642 #endif
643   if (!mtype) mtype = MATAIJ;
644   ierr = PetscStrcpy((char*)ttype,mtype);CHKERRQ(ierr);
645   ierr = PetscOptionsBegin(((PetscObject)da)->comm,((PetscObject)da)->prefix,"DMDA options","Mat");CHKERRQ(ierr);
646   ierr = PetscOptionsList("-da_mat_type","Matrix type","MatSetType",MatList,mtype,(char*)ttype,256,&flg);CHKERRQ(ierr);
647   ierr = PetscOptionsEnd();
648 
649   /*
650                                   m
651           ------------------------------------------------------
652          |                                                     |
653          |                                                     |
654          |               ----------------------                |
655          |               |                    |                |
656       n  |           ny  |                    |                |
657          |               |                    |                |
658          |               .---------------------                |
659          |             (xs,ys)     nx                          |
660          |            .                                        |
661          |         (gxs,gys)                                   |
662          |                                                     |
663           -----------------------------------------------------
664   */
665 
666   /*
667          nc - number of components per grid point
668          col - number of colors needed in one direction for single component problem
669 
670   */
671   ierr = DMDAGetInfo(da,&dim,&M,&N,&P,0,0,0,&dof,0,0,0,0,0);CHKERRQ(ierr);
672   ierr = DMDAGetCorners(da,0,0,0,&nx,&ny,&nz);CHKERRQ(ierr);
673   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
674   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
675   ierr = MatSetSizes(A,dof*nx*ny*nz,dof*nx*ny*nz,dof*M*N*P,dof*M*N*P);CHKERRQ(ierr);
676   ierr = MatSetType(A,(const MatType)ttype);CHKERRQ(ierr);
677   ierr = MatSetDA(A,da);CHKERRQ(ierr);
678   ierr = MatSetFromOptions(A);CHKERRQ(ierr);
679   ierr = MatGetType(A,&Atype);CHKERRQ(ierr);
680   /*
681      We do not provide a getmatrix function in the DMDA operations because
682    the basic DMDA does not know about matrices. We think of DMDA as being more
683    more low-level than matrices. This is kind of cheating but, cause sometimes
684    we think of DMDA has higher level than matrices.
685 
686      We could switch based on Atype (or mtype), but we do not since the
687    specialized setting routines depend only the particular preallocation
688    details of the matrix, not the type itself.
689   */
690   ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJSetPreallocation_C",&aij);CHKERRQ(ierr);
691   if (!aij) {
692     ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJSetPreallocation_C",&aij);CHKERRQ(ierr);
693   }
694   if (!aij) {
695     ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIBAIJSetPreallocation_C",&baij);CHKERRQ(ierr);
696     if (!baij) {
697       ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",&baij);CHKERRQ(ierr);
698     }
699     if (!baij){
700       ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPISBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr);
701       if (!sbaij) {
702         ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr);
703       }
704     }
705   }
706   if (aij) {
707     if (dim == 1) {
708       ierr = DMGetMatrix_DA_1d_MPIAIJ(da,A);CHKERRQ(ierr);
709     } else if (dim == 2) {
710       if (dd->ofill) {
711         ierr = DMGetMatrix_DA_2d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
712       } else {
713         ierr = DMGetMatrix_DA_2d_MPIAIJ(da,A);CHKERRQ(ierr);
714       }
715     } else if (dim == 3) {
716       if (dd->ofill) {
717         ierr = DMGetMatrix_DA_3d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
718       } else {
719         ierr = DMGetMatrix_DA_3d_MPIAIJ(da,A);CHKERRQ(ierr);
720       }
721     }
722   } else if (baij) {
723     if (dim == 2) {
724       ierr = DMGetMatrix_DA_2d_MPIBAIJ(da,A);CHKERRQ(ierr);
725     } else if (dim == 3) {
726       ierr = DMGetMatrix_DA_3d_MPIBAIJ(da,A);CHKERRQ(ierr);
727     } else {
728       SETERRQ3(((PetscObject)da)->comm,PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension!\n" \
729 	       "Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
730     }
731   } else if (sbaij) {
732     if (dim == 2) {
733       ierr = DMGetMatrix_DA_2d_MPISBAIJ(da,A);CHKERRQ(ierr);
734     } else if (dim == 3) {
735       ierr = DMGetMatrix_DA_3d_MPISBAIJ(da,A);CHKERRQ(ierr);
736     } else {
737       SETERRQ3(((PetscObject)da)->comm,PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension!\n" \
738 	       "Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
739     }
740   }
741   ierr = DMDAGetGhostCorners(da,&starts[0],&starts[1],&starts[2],&dims[0],&dims[1],&dims[2]);CHKERRQ(ierr);
742   ierr = MatSetStencil(A,dim,dims,starts,dof);CHKERRQ(ierr);
743   ierr = PetscObjectCompose((PetscObject)A,"DMDA",(PetscObject)da);CHKERRQ(ierr);
744   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
745   if (size > 1) {
746     /* change viewer to display matrix in natural ordering */
747     ierr = MatShellSetOperation(A, MATOP_VIEW, (void (*)(void)) MatView_MPI_DA);CHKERRQ(ierr);
748     ierr = MatShellSetOperation(A, MATOP_LOAD, (void (*)(void)) MatLoad_MPI_DA);CHKERRQ(ierr);
749   }
750   *J = A;
751   PetscFunctionReturn(0);
752 }
753 
754 /* ---------------------------------------------------------------------------------*/
755 #undef __FUNCT__
756 #define __FUNCT__ "DMGetMatrix_DA_2d_MPIAIJ"
757 PetscErrorCode DMGetMatrix_DA_2d_MPIAIJ(DM da,Mat J)
758 {
759   PetscErrorCode         ierr;
760   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = PETSC_NULL,k,nc,*rows = PETSC_NULL,col,cnt,l,p;
761   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
762   MPI_Comm               comm;
763   PetscScalar            *values;
764   DMDABoundaryType       bx,by;
765   ISLocalToGlobalMapping ltog,ltogb;
766   DMDAStencilType        st;
767   DM_DA                  *dd = (DM_DA*)da->data;
768 
769   PetscFunctionBegin;
770   /*
771          nc - number of components per grid point
772          col - number of colors needed in one direction for single component problem
773 
774   */
775   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
776   col = 2*s + 1;
777   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
778   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
779   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
780 
781   ierr = PetscMalloc2(nc,PetscInt,&rows,col*col*nc*nc,PetscInt,&cols);CHKERRQ(ierr);
782   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
783   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
784 
785   /* determine the matrix preallocation information */
786   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
787   for (i=xs; i<xs+nx; i++) {
788 
789     pstart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
790     pend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
791 
792     for (j=ys; j<ys+ny; j++) {
793       slot = i - gxs + gnx*(j - gys);
794 
795       lstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
796       lend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
797 
798       cnt  = 0;
799       for (k=0; k<nc; k++) {
800 	for (l=lstart; l<lend+1; l++) {
801 	  for (p=pstart; p<pend+1; p++) {
802 	    if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
803 	      cols[cnt++]  = k + nc*(slot + gnx*l + p);
804 	    }
805 	  }
806 	}
807 	rows[k] = k + nc*(slot);
808       }
809       ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
810     }
811   }
812   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
813   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
814   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
815   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
816 
817   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
818   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
819 
820   /*
821     For each node in the grid: we get the neighbors in the local (on processor ordering
822     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
823     PETSc ordering.
824   */
825   if (!dd->prealloc_only) {
826     ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
827     ierr = PetscMemzero(values,col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
828     for (i=xs; i<xs+nx; i++) {
829 
830       pstart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
831       pend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
832 
833       for (j=ys; j<ys+ny; j++) {
834 	slot = i - gxs + gnx*(j - gys);
835 
836 	lstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
837 	lend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
838 
839 	cnt  = 0;
840 	for (k=0; k<nc; k++) {
841 	  for (l=lstart; l<lend+1; l++) {
842 	    for (p=pstart; p<pend+1; p++) {
843 	      if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
844 		cols[cnt++]  = k + nc*(slot + gnx*l + p);
845 	      }
846 	    }
847 	  }
848 	  rows[k]      = k + nc*(slot);
849 	}
850 	ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
851       }
852     }
853     ierr = PetscFree(values);CHKERRQ(ierr);
854     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
855     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
856   }
857   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
858   PetscFunctionReturn(0);
859 }
860 
861 #undef __FUNCT__
862 #define __FUNCT__ "DMGetMatrix_DA_2d_MPIAIJ_Fill"
863 PetscErrorCode DMGetMatrix_DA_2d_MPIAIJ_Fill(DM da,Mat J)
864 {
865   PetscErrorCode         ierr;
866   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
867   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt,l,p;
868   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
869   DM_DA                  *dd = (DM_DA*)da->data;
870   PetscInt               ifill_col,*ofill = dd->ofill, *dfill = dd->dfill;
871   MPI_Comm               comm;
872   PetscScalar            *values;
873   DMDABoundaryType       bx,by;
874   ISLocalToGlobalMapping ltog,ltogb;
875   DMDAStencilType        st;
876 
877   PetscFunctionBegin;
878   /*
879          nc - number of components per grid point
880          col - number of colors needed in one direction for single component problem
881 
882   */
883   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
884   col = 2*s + 1;
885   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
886   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
887   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
888 
889   ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscInt),&cols);CHKERRQ(ierr);
890   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
891   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
892 
893   /* determine the matrix preallocation information */
894   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
895   for (i=xs; i<xs+nx; i++) {
896 
897     pstart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
898     pend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
899 
900     for (j=ys; j<ys+ny; j++) {
901       slot = i - gxs + gnx*(j - gys);
902 
903       lstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
904       lend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
905 
906       for (k=0; k<nc; k++) {
907         cnt  = 0;
908 	for (l=lstart; l<lend+1; l++) {
909 	  for (p=pstart; p<pend+1; p++) {
910             if (l || p) {
911 	      if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
912                 for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++)
913 		  cols[cnt++]  = ofill[ifill_col] + nc*(slot + gnx*l + p);
914 	      }
915             } else {
916 	      if (dfill) {
917 		for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++)
918 		  cols[cnt++]  = dfill[ifill_col] + nc*(slot + gnx*l + p);
919 	      } else {
920 		for (ifill_col=0; ifill_col<nc; ifill_col++)
921 		  cols[cnt++]  = ifill_col + nc*(slot + gnx*l + p);
922 	      }
923             }
924 	  }
925 	}
926 	row = k + nc*(slot);
927         ierr = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
928       }
929     }
930   }
931   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
932   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
933   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
934   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
935   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
936 
937   /*
938     For each node in the grid: we get the neighbors in the local (on processor ordering
939     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
940     PETSc ordering.
941   */
942   if (!dd->prealloc_only) {
943     ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
944     ierr = PetscMemzero(values,col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
945     for (i=xs; i<xs+nx; i++) {
946 
947       pstart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
948       pend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
949 
950       for (j=ys; j<ys+ny; j++) {
951 	slot = i - gxs + gnx*(j - gys);
952 
953 	lstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
954 	lend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
955 
956 	for (k=0; k<nc; k++) {
957 	  cnt  = 0;
958 	  for (l=lstart; l<lend+1; l++) {
959 	    for (p=pstart; p<pend+1; p++) {
960 	      if (l || p) {
961 		if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
962 		  for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++)
963 		    cols[cnt++]  = ofill[ifill_col] + nc*(slot + gnx*l + p);
964 		}
965 	      } else {
966 		if (dfill) {
967 		  for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++)
968 		    cols[cnt++]  = dfill[ifill_col] + nc*(slot + gnx*l + p);
969 		} else {
970 		  for (ifill_col=0; ifill_col<nc; ifill_col++)
971 		    cols[cnt++]  = ifill_col + nc*(slot + gnx*l + p);
972 		}
973 	      }
974 	    }
975 	  }
976 	  row  = k + nc*(slot);
977 	  ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
978 	}
979       }
980     }
981     ierr = PetscFree(values);CHKERRQ(ierr);
982     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
983     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
984   }
985   ierr = PetscFree(cols);CHKERRQ(ierr);
986   PetscFunctionReturn(0);
987 }
988 
989 /* ---------------------------------------------------------------------------------*/
990 
991 #undef __FUNCT__
992 #define __FUNCT__ "DMGetMatrix_DA_3d_MPIAIJ"
993 PetscErrorCode DMGetMatrix_DA_3d_MPIAIJ(DM da,Mat J)
994 {
995   PetscErrorCode         ierr;
996   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
997   PetscInt               m,n,dim,s,*cols = PETSC_NULL,k,nc,*rows = PETSC_NULL,col,cnt,l,p,*dnz = PETSC_NULL,*onz = PETSC_NULL;
998   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
999   MPI_Comm               comm;
1000   PetscScalar            *values;
1001   DMDABoundaryType       bx,by,bz;
1002   ISLocalToGlobalMapping ltog,ltogb;
1003   DMDAStencilType        st;
1004   DM_DA                  *dd = (DM_DA*)da->data;
1005 
1006   PetscFunctionBegin;
1007   /*
1008          nc - number of components per grid point
1009          col - number of colors needed in one direction for single component problem
1010 
1011   */
1012   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1013   col    = 2*s + 1;
1014 
1015   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1016   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1017   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1018 
1019   ierr = PetscMalloc2(nc,PetscInt,&rows,col*col*col*nc*nc,PetscInt,&cols);CHKERRQ(ierr);
1020   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1021   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1022 
1023   /* determine the matrix preallocation information */
1024   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1025   for (i=xs; i<xs+nx; i++) {
1026     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1027     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1028     for (j=ys; j<ys+ny; j++) {
1029       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1030       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1031       for (k=zs; k<zs+nz; k++) {
1032 	kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1033 	kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1034 
1035 	slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1036 
1037 	cnt  = 0;
1038 	for (l=0; l<nc; l++) {
1039 	  for (ii=istart; ii<iend+1; ii++) {
1040 	    for (jj=jstart; jj<jend+1; jj++) {
1041 	      for (kk=kstart; kk<kend+1; kk++) {
1042 		if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1043 		  cols[cnt++]  = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1044 		}
1045 	      }
1046 	    }
1047 	  }
1048 	  rows[l] = l + nc*(slot);
1049 	}
1050 	ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1051       }
1052     }
1053   }
1054   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1055   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1056   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1057   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1058   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1059   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1060 
1061   /*
1062     For each node in the grid: we get the neighbors in the local (on processor ordering
1063     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1064     PETSc ordering.
1065   */
1066   if (!dd->prealloc_only) {
1067     ierr = PetscMalloc(col*col*col*nc*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1068     ierr = PetscMemzero(values,col*col*col*nc*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1069     for (i=xs; i<xs+nx; i++) {
1070       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1071       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1072       for (j=ys; j<ys+ny; j++) {
1073 	jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1074 	jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1075 	for (k=zs; k<zs+nz; k++) {
1076 	  kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1077 	  kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1078 
1079 	  slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1080 
1081 	  cnt  = 0;
1082 	  for (l=0; l<nc; l++) {
1083 	    for (ii=istart; ii<iend+1; ii++) {
1084 	      for (jj=jstart; jj<jend+1; jj++) {
1085 		for (kk=kstart; kk<kend+1; kk++) {
1086 		  if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1087 		    cols[cnt++]  = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1088 		  }
1089 		}
1090 	      }
1091 	    }
1092 	    rows[l]      = l + nc*(slot);
1093 	  }
1094 	  ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1095 	}
1096       }
1097     }
1098     ierr = PetscFree(values);CHKERRQ(ierr);
1099     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1100     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1101   }
1102   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1103   PetscFunctionReturn(0);
1104 }
1105 
1106 /* ---------------------------------------------------------------------------------*/
1107 
1108 #undef __FUNCT__
1109 #define __FUNCT__ "DMGetMatrix_DA_1d_MPIAIJ"
1110 PetscErrorCode DMGetMatrix_DA_1d_MPIAIJ(DM da,Mat J)
1111 {
1112   PetscErrorCode         ierr;
1113   PetscInt               xs,nx,i,i1,slot,gxs,gnx;
1114   PetscInt               m,dim,s,*cols = PETSC_NULL,nc,*rows = PETSC_NULL,col,cnt,l;
1115   PetscInt               istart,iend;
1116   PetscScalar            *values;
1117   DMDABoundaryType       bx;
1118   ISLocalToGlobalMapping ltog,ltogb;
1119   DM_DA                  *dd = (DM_DA*)da->data;
1120 
1121   PetscFunctionBegin;
1122   /*
1123          nc - number of components per grid point
1124          col - number of colors needed in one direction for single component problem
1125 
1126   */
1127   ierr = DMDAGetInfo(da,&dim,&m,0,0,0,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr);
1128   col    = 2*s + 1;
1129 
1130   ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
1131   ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
1132 
1133   ierr = MatSeqAIJSetPreallocation(J,col*nc,0);CHKERRQ(ierr);
1134   ierr = MatMPIAIJSetPreallocation(J,col*nc,0,col*nc,0);CHKERRQ(ierr);
1135   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1136   ierr = PetscMalloc2(nc,PetscInt,&rows,col*nc*nc,PetscInt,&cols);CHKERRQ(ierr);
1137 
1138   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1139   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1140   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1141   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1142 
1143   /*
1144     For each node in the grid: we get the neighbors in the local (on processor ordering
1145     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1146     PETSc ordering.
1147   */
1148   if (!dd->prealloc_only) {
1149     ierr = PetscMalloc(col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1150     ierr = PetscMemzero(values,col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1151     for (i=xs; i<xs+nx; i++) {
1152       istart = PetscMax(-s,gxs - i);
1153       iend   = PetscMin(s,gxs + gnx - i - 1);
1154       slot   = i - gxs;
1155 
1156       cnt  = 0;
1157       for (l=0; l<nc; l++) {
1158 	for (i1=istart; i1<iend+1; i1++) {
1159 	  cols[cnt++] = l + nc*(slot + i1);
1160 	}
1161 	rows[l]      = l + nc*(slot);
1162       }
1163       ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1164     }
1165     ierr = PetscFree(values);CHKERRQ(ierr);
1166     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1167     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1168   }
1169   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1170   PetscFunctionReturn(0);
1171 }
1172 
1173 #undef __FUNCT__
1174 #define __FUNCT__ "DMGetMatrix_DA_2d_MPIBAIJ"
1175 PetscErrorCode DMGetMatrix_DA_2d_MPIBAIJ(DM da,Mat J)
1176 {
1177   PetscErrorCode         ierr;
1178   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1179   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
1180   PetscInt               istart,iend,jstart,jend,ii,jj;
1181   MPI_Comm               comm;
1182   PetscScalar            *values;
1183   DMDABoundaryType       bx,by;
1184   DMDAStencilType        st;
1185   ISLocalToGlobalMapping ltog,ltogb;
1186   DM_DA                  *dd = (DM_DA*)da->data;
1187 
1188   PetscFunctionBegin;
1189   /*
1190      nc - number of components per grid point
1191      col - number of colors needed in one direction for single component problem
1192   */
1193   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
1194   col = 2*s + 1;
1195 
1196   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
1197   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
1198   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1199 
1200   ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1201 
1202   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1203   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1204 
1205   /* determine the matrix preallocation information */
1206   ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
1207   for (i=xs; i<xs+nx; i++) {
1208     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1209     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1210     for (j=ys; j<ys+ny; j++) {
1211       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1212       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1213       slot = i - gxs + gnx*(j - gys);
1214 
1215       /* Find block columns in block row */
1216       cnt  = 0;
1217       for (ii=istart; ii<iend+1; ii++) {
1218         for (jj=jstart; jj<jend+1; jj++) {
1219           if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1220             cols[cnt++]  = slot + ii + gnx*jj;
1221           }
1222         }
1223       }
1224       ierr = MatPreallocateSetLocal(ltogb,1,&slot,ltogb,cnt,cols,dnz,onz);CHKERRQ(ierr);
1225     }
1226   }
1227   ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1228   ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1229   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1230 
1231   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1232   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1233 
1234   /*
1235     For each node in the grid: we get the neighbors in the local (on processor ordering
1236     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1237     PETSc ordering.
1238   */
1239   if (!dd->prealloc_only) {
1240     ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1241     ierr = PetscMemzero(values,col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1242     for (i=xs; i<xs+nx; i++) {
1243       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1244       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1245       for (j=ys; j<ys+ny; j++) {
1246 	jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1247 	jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1248 	slot = i - gxs + gnx*(j - gys);
1249 	cnt  = 0;
1250         for (ii=istart; ii<iend+1; ii++) {
1251           for (jj=jstart; jj<jend+1; jj++) {
1252             if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1253               cols[cnt++]  = slot + ii + gnx*jj;
1254             }
1255           }
1256         }
1257 	ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1258       }
1259     }
1260     ierr = PetscFree(values);CHKERRQ(ierr);
1261     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1262     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1263   }
1264   ierr = PetscFree(cols);CHKERRQ(ierr);
1265   PetscFunctionReturn(0);
1266 }
1267 
1268 #undef __FUNCT__
1269 #define __FUNCT__ "DMGetMatrix_DA_3d_MPIBAIJ"
1270 PetscErrorCode DMGetMatrix_DA_3d_MPIBAIJ(DM da,Mat J)
1271 {
1272   PetscErrorCode         ierr;
1273   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1274   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
1275   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1276   MPI_Comm               comm;
1277   PetscScalar            *values;
1278   DMDABoundaryType       bx,by,bz;
1279   DMDAStencilType        st;
1280   ISLocalToGlobalMapping ltog,ltogb;
1281   DM_DA                  *dd = (DM_DA*)da->data;
1282 
1283   PetscFunctionBegin;
1284   /*
1285          nc - number of components per grid point
1286          col - number of colors needed in one direction for single component problem
1287 
1288   */
1289   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1290   col    = 2*s + 1;
1291 
1292   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1293   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1294   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1295 
1296   ierr  = PetscMalloc(col*col*col*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1297 
1298   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1299   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1300 
1301   /* determine the matrix preallocation information */
1302   ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1303   for (i=xs; i<xs+nx; i++) {
1304     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1305     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1306     for (j=ys; j<ys+ny; j++) {
1307       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1308       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1309       for (k=zs; k<zs+nz; k++) {
1310 	kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1311 	kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1312 
1313 	slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1314 
1315 	/* Find block columns in block row */
1316 	cnt  = 0;
1317         for (ii=istart; ii<iend+1; ii++) {
1318           for (jj=jstart; jj<jend+1; jj++) {
1319             for (kk=kstart; kk<kend+1; kk++) {
1320               if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1321 		cols[cnt++]  = slot + ii + gnx*jj + gnx*gny*kk;
1322 	      }
1323 	    }
1324 	  }
1325 	}
1326 	ierr = MatPreallocateSetLocal(ltogb,1,&slot,ltogb,cnt,cols,dnz,onz);CHKERRQ(ierr);
1327       }
1328     }
1329   }
1330   ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1331   ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1332   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1333 
1334   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1335   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1336 
1337   /*
1338     For each node in the grid: we get the neighbors in the local (on processor ordering
1339     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1340     PETSc ordering.
1341   */
1342   if (!dd->prealloc_only) {
1343     ierr  = PetscMalloc(col*col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1344     ierr  = PetscMemzero(values,col*col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1345     for (i=xs; i<xs+nx; i++) {
1346       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1347       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1348       for (j=ys; j<ys+ny; j++) {
1349 	jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1350 	jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1351 	for (k=zs; k<zs+nz; k++) {
1352 	  kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1353 	  kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1354 
1355 	  slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1356 
1357 	  cnt  = 0;
1358           for (ii=istart; ii<iend+1; ii++) {
1359             for (jj=jstart; jj<jend+1; jj++) {
1360               for (kk=kstart; kk<kend+1; kk++) {
1361                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1362                   cols[cnt++]  = slot + ii + gnx*jj + gnx*gny*kk;
1363                 }
1364               }
1365             }
1366           }
1367 	  ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1368 	}
1369       }
1370     }
1371     ierr = PetscFree(values);CHKERRQ(ierr);
1372     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1373     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1374   }
1375   ierr = PetscFree(cols);CHKERRQ(ierr);
1376   PetscFunctionReturn(0);
1377 }
1378 
1379 #undef __FUNCT__
1380 #define __FUNCT__ "L2GFilterUpperTriangular"
1381 /*
1382   This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to
1383   identify in the local ordering with periodic domain.
1384 */
1385 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog,PetscInt *row,PetscInt *cnt,PetscInt col[])
1386 {
1387   PetscErrorCode ierr;
1388   PetscInt       i,n;
1389 
1390   PetscFunctionBegin;
1391   ierr = ISLocalToGlobalMappingApply(ltog,1,row,row);CHKERRQ(ierr);
1392   ierr = ISLocalToGlobalMappingApply(ltog,*cnt,col,col);CHKERRQ(ierr);
1393   for (i=0,n=0; i<*cnt; i++) {
1394     if (col[i] >= *row) col[n++] = col[i];
1395   }
1396   *cnt = n;
1397   PetscFunctionReturn(0);
1398 }
1399 
1400 #undef __FUNCT__
1401 #define __FUNCT__ "DMGetMatrix_DA_2d_MPISBAIJ"
1402 PetscErrorCode DMGetMatrix_DA_2d_MPISBAIJ(DM da,Mat J)
1403 {
1404   PetscErrorCode         ierr;
1405   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1406   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
1407   PetscInt               istart,iend,jstart,jend,ii,jj;
1408   MPI_Comm               comm;
1409   PetscScalar            *values;
1410   DMDABoundaryType       bx,by;
1411   DMDAStencilType        st;
1412   ISLocalToGlobalMapping ltog,ltogb;
1413   DM_DA                  *dd = (DM_DA*)da->data;
1414 
1415   PetscFunctionBegin;
1416   /*
1417      nc - number of components per grid point
1418      col - number of colors needed in one direction for single component problem
1419   */
1420   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
1421   col = 2*s + 1;
1422 
1423   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
1424   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
1425   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1426 
1427   ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1428 
1429   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1430   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1431 
1432   /* determine the matrix preallocation information */
1433   ierr = MatPreallocateSymmetricInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
1434   for (i=xs; i<xs+nx; i++) {
1435     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1436     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1437     for (j=ys; j<ys+ny; j++) {
1438       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1439       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1440       slot = i - gxs + gnx*(j - gys);
1441 
1442       /* Find block columns in block row */
1443       cnt  = 0;
1444       for (ii=istart; ii<iend+1; ii++) {
1445         for (jj=jstart; jj<jend+1; jj++) {
1446           if (st == DMDA_STENCIL_BOX || !ii || !jj) {
1447             cols[cnt++]  = slot + ii + gnx*jj;
1448           }
1449         }
1450       }
1451       ierr = L2GFilterUpperTriangular(ltogb,&slot,&cnt,cols);CHKERRQ(ierr);
1452       ierr = MatPreallocateSymmetricSet(slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
1453     }
1454   }
1455   ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1456   ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1457   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1458 
1459   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1460   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1461 
1462   /*
1463     For each node in the grid: we get the neighbors in the local (on processor ordering
1464     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1465     PETSc ordering.
1466   */
1467   if (!dd->prealloc_only) {
1468     ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1469     ierr = PetscMemzero(values,col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1470     for (i=xs; i<xs+nx; i++) {
1471       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1472       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1473       for (j=ys; j<ys+ny; j++) {
1474         jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1475         jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1476         slot = i - gxs + gnx*(j - gys);
1477 
1478         /* Find block columns in block row */
1479         cnt  = 0;
1480         for (ii=istart; ii<iend+1; ii++) {
1481           for (jj=jstart; jj<jend+1; jj++) {
1482             if (st == DMDA_STENCIL_BOX || !ii || !jj) {
1483               cols[cnt++]  = slot + ii + gnx*jj;
1484             }
1485           }
1486         }
1487         ierr = L2GFilterUpperTriangular(ltogb,&slot,&cnt,cols);CHKERRQ(ierr);
1488 	ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1489       }
1490     }
1491     ierr = PetscFree(values);CHKERRQ(ierr);
1492     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1493     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1494   }
1495   ierr = PetscFree(cols);CHKERRQ(ierr);
1496   PetscFunctionReturn(0);
1497 }
1498 
1499 #undef __FUNCT__
1500 #define __FUNCT__ "DMGetMatrix_DA_3d_MPISBAIJ"
1501 PetscErrorCode DMGetMatrix_DA_3d_MPISBAIJ(DM da,Mat J)
1502 {
1503   PetscErrorCode         ierr;
1504   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1505   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
1506   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1507   MPI_Comm               comm;
1508   PetscScalar            *values;
1509   DMDABoundaryType       bx,by,bz;
1510   DMDAStencilType        st;
1511   ISLocalToGlobalMapping ltog,ltogb;
1512   DM_DA                  *dd = (DM_DA*)da->data;
1513 
1514   PetscFunctionBegin;
1515   /*
1516      nc - number of components per grid point
1517      col - number of colors needed in one direction for single component problem
1518   */
1519   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1520   col = 2*s + 1;
1521 
1522   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1523   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1524   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1525 
1526   /* create the matrix */
1527   ierr = PetscMalloc(col*col*col*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1528 
1529   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1530   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1531 
1532   /* determine the matrix preallocation information */
1533   ierr = MatPreallocateSymmetricInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1534   for (i=xs; i<xs+nx; i++) {
1535     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1536     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1537     for (j=ys; j<ys+ny; j++) {
1538       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1539       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1540       for (k=zs; k<zs+nz; k++) {
1541         kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1542 	kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1543 
1544 	slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1545 
1546 	/* Find block columns in block row */
1547 	cnt  = 0;
1548         for (ii=istart; ii<iend+1; ii++) {
1549           for (jj=jstart; jj<jend+1; jj++) {
1550             for (kk=kstart; kk<kend+1; kk++) {
1551               if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
1552                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1553               }
1554             }
1555           }
1556         }
1557         ierr = L2GFilterUpperTriangular(ltogb,&slot,&cnt,cols);CHKERRQ(ierr);
1558         ierr = MatPreallocateSymmetricSet(slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
1559       }
1560     }
1561   }
1562   ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1563   ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1564   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1565 
1566   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1567   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1568 
1569   /*
1570     For each node in the grid: we get the neighbors in the local (on processor ordering
1571     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1572     PETSc ordering.
1573   */
1574   if (!dd->prealloc_only) {
1575     ierr = PetscMalloc(col*col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1576     ierr = PetscMemzero(values,col*col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1577     for (i=xs; i<xs+nx; i++) {
1578       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1579       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1580       for (j=ys; j<ys+ny; j++) {
1581         jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1582 	jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1583 	for (k=zs; k<zs+nz; k++) {
1584           kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1585 	  kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1586 
1587 	  slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1588 
1589 	  cnt  = 0;
1590           for (ii=istart; ii<iend+1; ii++) {
1591             for (jj=jstart; jj<jend+1; jj++) {
1592               for (kk=kstart; kk<kend+1; kk++) {
1593                 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
1594 		  cols[cnt++]  = slot + ii + gnx*jj + gnx*gny*kk;
1595 		}
1596 	      }
1597 	    }
1598 	  }
1599           ierr = L2GFilterUpperTriangular(ltogb,&slot,&cnt,cols);CHKERRQ(ierr);
1600           ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1601 	}
1602       }
1603     }
1604     ierr = PetscFree(values);CHKERRQ(ierr);
1605     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1606     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1607   }
1608   ierr = PetscFree(cols);CHKERRQ(ierr);
1609   PetscFunctionReturn(0);
1610 }
1611 
1612 /* ---------------------------------------------------------------------------------*/
1613 
1614 #undef __FUNCT__
1615 #define __FUNCT__ "DMGetMatrix_DA_3d_MPIAIJ_Fill"
1616 PetscErrorCode DMGetMatrix_DA_3d_MPIAIJ_Fill(DM da,Mat J)
1617 {
1618   PetscErrorCode         ierr;
1619   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1620   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt,l,p,*dnz,*onz;
1621   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1622   DM_DA                  *dd = (DM_DA*)da->data;
1623   PetscInt               ifill_col,*dfill = dd->dfill,*ofill = dd->ofill;
1624   MPI_Comm               comm;
1625   PetscScalar            *values;
1626   DMDABoundaryType       bx,by,bz;
1627   ISLocalToGlobalMapping ltog,ltogb;
1628   DMDAStencilType        st;
1629 
1630   PetscFunctionBegin;
1631   /*
1632          nc - number of components per grid point
1633          col - number of colors needed in one direction for single component problem
1634 
1635   */
1636   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1637   col    = 2*s + 1;
1638   if (bx == DMDA_BOUNDARY_PERIODIC && (m % col)){
1639     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
1640                  by 2*stencil_width + 1\n");
1641   }
1642   if (by == DMDA_BOUNDARY_PERIODIC && (n % col)){
1643     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
1644                  by 2*stencil_width + 1\n");
1645   }
1646   if (bz == DMDA_BOUNDARY_PERIODIC && (p % col)){
1647     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
1648                  by 2*stencil_width + 1\n");
1649   }
1650 
1651   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1652   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1653   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1654 
1655   ierr = PetscMalloc(col*col*col*nc*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1656   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1657   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1658 
1659   /* determine the matrix preallocation information */
1660   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1661 
1662 
1663   for (i=xs; i<xs+nx; i++) {
1664     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1665     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1666     for (j=ys; j<ys+ny; j++) {
1667       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1668       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1669       for (k=zs; k<zs+nz; k++) {
1670         kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1671         kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1672 
1673         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1674 
1675 	for (l=0; l<nc; l++) {
1676 	  cnt  = 0;
1677 	  for (ii=istart; ii<iend+1; ii++) {
1678 	    for (jj=jstart; jj<jend+1; jj++) {
1679 	      for (kk=kstart; kk<kend+1; kk++) {
1680 		if (ii || jj || kk) {
1681 		  if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1682 		    for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++)
1683 		      cols[cnt++]  = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1684 		  }
1685 		} else {
1686 		  if (dfill) {
1687 		    for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++)
1688 		      cols[cnt++]  = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1689 		  } else {
1690 		    for (ifill_col=0; ifill_col<nc; ifill_col++)
1691 		      cols[cnt++]  = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1692 		  }
1693 		}
1694 	      }
1695 	    }
1696 	  }
1697 	  row  = l + nc*(slot);
1698 	  ierr = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1699 	}
1700       }
1701     }
1702   }
1703   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1704   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1705   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1706   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1707   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1708 
1709   /*
1710     For each node in the grid: we get the neighbors in the local (on processor ordering
1711     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1712     PETSc ordering.
1713   */
1714   if (!dd->prealloc_only) {
1715     ierr = PetscMalloc(col*col*col*nc*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1716     ierr = PetscMemzero(values,col*col*col*nc*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1717     for (i=xs; i<xs+nx; i++) {
1718       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1719       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1720       for (j=ys; j<ys+ny; j++) {
1721 	jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1722 	jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1723 	for (k=zs; k<zs+nz; k++) {
1724 	  kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1725 	  kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1726 
1727 	  slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1728 
1729 	  for (l=0; l<nc; l++) {
1730 	    cnt  = 0;
1731 	    for (ii=istart; ii<iend+1; ii++) {
1732 	      for (jj=jstart; jj<jend+1; jj++) {
1733 		for (kk=kstart; kk<kend+1; kk++) {
1734 		  if (ii || jj || kk) {
1735 		    if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1736 		      for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++)
1737 			cols[cnt++]  = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1738 		    }
1739 		  } else {
1740 		    if (dfill) {
1741 		      for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++)
1742 			cols[cnt++]  = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1743 		    } else {
1744 		      for (ifill_col=0; ifill_col<nc; ifill_col++)
1745 			cols[cnt++]  = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1746 		    }
1747 		  }
1748 		}
1749 	      }
1750 	    }
1751 	    row  = l + nc*(slot);
1752 	    ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1753 	  }
1754 	}
1755       }
1756     }
1757     ierr = PetscFree(values);CHKERRQ(ierr);
1758     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1759     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1760   }
1761   ierr = PetscFree(cols);CHKERRQ(ierr);
1762   PetscFunctionReturn(0);
1763 }
1764