xref: /petsc/src/dm/impls/da/fdda.c (revision 6a4bb4b043f38746f1d570dcb66790e05f8eeea6)
1 
2 #include <private/daimpl.h> /*I      "petscdmda.h"     I*/
3 #include <petscmat.h>         /*I      "petscmat.h"    I*/
4 #include <private/matimpl.h>
5 
6 extern PetscErrorCode DMGetColoring_DA_1d_MPIAIJ(DM,ISColoringType,ISColoring *);
7 extern PetscErrorCode DMGetColoring_DA_2d_MPIAIJ(DM,ISColoringType,ISColoring *);
8 extern PetscErrorCode DMGetColoring_DA_2d_5pt_MPIAIJ(DM,ISColoringType,ISColoring *);
9 extern PetscErrorCode DMGetColoring_DA_3d_MPIAIJ(DM,ISColoringType,ISColoring *);
10 
11 /*
12    For ghost i that may be negative or greater than the upper bound this
13   maps it into the 0:m-1 range using periodicity
14 */
15 #define SetInRange(i,m) ((i < 0) ? m+i:((i >= m) ? i-m:i))
16 
17 #undef __FUNCT__
18 #define __FUNCT__ "DMDASetBlockFills_Private"
19 static PetscErrorCode DMDASetBlockFills_Private(PetscInt *dfill,PetscInt w,PetscInt **rfill)
20 {
21   PetscErrorCode ierr;
22   PetscInt       i,j,nz,*fill;
23 
24   PetscFunctionBegin;
25   if (!dfill) PetscFunctionReturn(0);
26 
27   /* count number nonzeros */
28   nz = 0;
29   for (i=0; i<w; i++) {
30     for (j=0; j<w; j++) {
31       if (dfill[w*i+j]) nz++;
32     }
33   }
34   ierr = PetscMalloc((nz + w + 1)*sizeof(PetscInt),&fill);CHKERRQ(ierr);
35   /* construct modified CSR storage of nonzero structure */
36   nz = w + 1;
37   for (i=0; i<w; i++) {
38     fill[i] = nz;
39     for (j=0; j<w; j++) {
40       if (dfill[w*i+j]) {
41 	fill[nz] = j;
42 	nz++;
43       }
44     }
45   }
46   fill[w] = nz;
47 
48   *rfill = fill;
49   PetscFunctionReturn(0);
50 }
51 
52 #undef __FUNCT__
53 #define __FUNCT__ "DMDASetBlockFills"
54 /*@
55     DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem
56     of the matrix returned by DMGetMatrix().
57 
58     Logically Collective on DMDA
59 
60     Input Parameter:
61 +   da - the distributed array
62 .   dfill - the fill pattern in the diagonal block (may be PETSC_NULL, means use dense block)
63 -   ofill - the fill pattern in the off-diagonal blocks
64 
65 
66     Level: developer
67 
68     Notes: This only makes sense when you are doing multicomponent problems but using the
69        MPIAIJ matrix format
70 
71            The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries
72        representing coupling and 0 entries for missing coupling. For example
73 $             dfill[9] = {1, 0, 0,
74 $                         1, 1, 0,
75 $                         0, 1, 1}
76        means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with
77        itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the
78        diagonal block).
79 
80      DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
81      can be represented in the dfill, ofill format
82 
83    Contributed by Glenn Hammond
84 
85 .seealso DMGetMatrix(), DMDASetGetMatrix(), DMDASetMatPreallocateOnly()
86 
87 @*/
88 PetscErrorCode  DMDASetBlockFills(DM da,PetscInt *dfill,PetscInt *ofill)
89 {
90   DM_DA          *dd = (DM_DA*)da->data;
91   PetscErrorCode ierr;
92 
93   PetscFunctionBegin;
94   ierr = DMDASetBlockFills_Private(dfill,dd->w,&dd->dfill);CHKERRQ(ierr);
95   ierr = DMDASetBlockFills_Private(ofill,dd->w,&dd->ofill);CHKERRQ(ierr);
96   PetscFunctionReturn(0);
97 }
98 
99 
100 #undef __FUNCT__
101 #define __FUNCT__ "DMGetColoring_DA"
102 PetscErrorCode  DMGetColoring_DA(DM da,ISColoringType ctype,const MatType mtype,ISColoring *coloring)
103 {
104   PetscErrorCode   ierr;
105   PetscInt         dim,m,n,p,nc;
106   DMDABoundaryType bx,by,bz;
107   MPI_Comm         comm;
108   PetscMPIInt      size;
109   PetscBool        isBAIJ;
110   DM_DA            *dd = (DM_DA*)da->data;
111 
112   PetscFunctionBegin;
113   /*
114                                   m
115           ------------------------------------------------------
116          |                                                     |
117          |                                                     |
118          |               ----------------------                |
119          |               |                    |                |
120       n  |           yn  |                    |                |
121          |               |                    |                |
122          |               .---------------------                |
123          |             (xs,ys)     xn                          |
124          |            .                                        |
125          |         (gxs,gys)                                   |
126          |                                                     |
127           -----------------------------------------------------
128   */
129 
130   /*
131          nc - number of components per grid point
132          col - number of colors needed in one direction for single component problem
133 
134   */
135   ierr = DMDAGetInfo(da,&dim,0,0,0,&m,&n,&p,&nc,0,&bx,&by,&bz,0);CHKERRQ(ierr);
136 
137   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
138   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
139   if (ctype == IS_COLORING_GHOSTED){
140     if (size == 1) {
141       ctype = IS_COLORING_GLOBAL;
142     } else if (dim > 1){
143       if ((m==1 && bx == DMDA_BOUNDARY_PERIODIC) || (n==1 && by == DMDA_BOUNDARY_PERIODIC) || (p==1 && bz == DMDA_BOUNDARY_PERIODIC)){
144         SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"IS_COLORING_GHOSTED cannot be used for periodic boundary condition having both ends of the domain  on the same process");
145       }
146     }
147   }
148 
149   /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ
150      matrices is for the blocks, not the individual matrix elements  */
151   ierr = PetscStrcmp(mtype,MATBAIJ,&isBAIJ);CHKERRQ(ierr);
152   if (!isBAIJ) {ierr = PetscStrcmp(mtype,MATMPIBAIJ,&isBAIJ);CHKERRQ(ierr);}
153   if (!isBAIJ) {ierr = PetscStrcmp(mtype,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);}
154   if (isBAIJ) {
155     dd->w = 1;
156     dd->xs = dd->xs/nc;
157     dd->xe = dd->xe/nc;
158     dd->Xs = dd->Xs/nc;
159     dd->Xe = dd->Xe/nc;
160   }
161 
162   /*
163      We do not provide a getcoloring function in the DMDA operations because
164    the basic DMDA does not know about matrices. We think of DMDA as being more
165    more low-level then matrices.
166   */
167   if (dim == 1) {
168     ierr = DMGetColoring_DA_1d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
169   } else if (dim == 2) {
170     ierr =  DMGetColoring_DA_2d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
171   } else if (dim == 3) {
172     ierr =  DMGetColoring_DA_3d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
173   } else SETERRQ1(((PetscObject)da)->comm,PETSC_ERR_SUP,"Not done for %D dimension, send us mail petsc-maint@mcs.anl.gov for code",dim);
174   if (isBAIJ) {
175     dd->w = nc;
176     dd->xs = dd->xs*nc;
177     dd->xe = dd->xe*nc;
178     dd->Xs = dd->Xs*nc;
179     dd->Xe = dd->Xe*nc;
180   }
181   PetscFunctionReturn(0);
182 }
183 
184 /* ---------------------------------------------------------------------------------*/
185 
186 #undef __FUNCT__
187 #define __FUNCT__ "DMGetColoring_DA_2d_MPIAIJ"
188 PetscErrorCode DMGetColoring_DA_2d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
189 {
190   PetscErrorCode         ierr;
191   PetscInt               xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,M,N,dim,s,k,nc,col;
192   PetscInt               ncolors;
193   MPI_Comm               comm;
194   DMDABoundaryType       bx,by;
195   DMDAStencilType        st;
196   ISColoringValue        *colors;
197   DM_DA                  *dd = (DM_DA*)da->data;
198 
199   PetscFunctionBegin;
200   /*
201          nc - number of components per grid point
202          col - number of colors needed in one direction for single component problem
203 
204   */
205   ierr = DMDAGetInfo(da,&dim,&m,&n,0,&M,&N,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
206   col    = 2*s + 1;
207   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
208   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
209   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
210 
211   /* special case as taught to us by Paul Hovland */
212   if (st == DMDA_STENCIL_STAR && s == 1) {
213     ierr = DMGetColoring_DA_2d_5pt_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
214   } else {
215 
216     if (bx == DMDA_BOUNDARY_PERIODIC && (m % col)){
217       SETERRQ2(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X (%d) is divisible\n\
218                  by 2*stencil_width + 1 (%d)\n", m, col);
219     }
220     if (by == DMDA_BOUNDARY_PERIODIC && (n % col)){
221       SETERRQ2(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y (%d) is divisible\n\
222                  by 2*stencil_width + 1 (%d)\n", n, col);
223     }
224     if (ctype == IS_COLORING_GLOBAL) {
225       if (!dd->localcoloring) {
226 	ierr = PetscMalloc(nc*nx*ny*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
227 	ii = 0;
228 	for (j=ys; j<ys+ny; j++) {
229 	  for (i=xs; i<xs+nx; i++) {
230 	    for (k=0; k<nc; k++) {
231 	      colors[ii++] = k + nc*((i % col) + col*(j % col));
232 	    }
233 	  }
234 	}
235         ncolors = nc + nc*(col-1 + col*(col-1));
236 	ierr = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,&dd->localcoloring);CHKERRQ(ierr);
237       }
238       *coloring = dd->localcoloring;
239     } else if (ctype == IS_COLORING_GHOSTED) {
240       if (!dd->ghostedcoloring) {
241 	ierr = PetscMalloc(nc*gnx*gny*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
242 	ii = 0;
243 	for (j=gys; j<gys+gny; j++) {
244 	  for (i=gxs; i<gxs+gnx; i++) {
245 	    for (k=0; k<nc; k++) {
246 	      /* the complicated stuff is to handle periodic boundaries */
247 	      colors[ii++] = k + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col));
248 	    }
249 	  }
250 	}
251         ncolors = nc + nc*(col - 1 + col*(col-1));
252 	ierr = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,&dd->ghostedcoloring);CHKERRQ(ierr);
253         /* PetscIntView(ncolors,(PetscInt *)colors,0); */
254 
255 	ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_GHOSTED);CHKERRQ(ierr);
256       }
257       *coloring = dd->ghostedcoloring;
258     } else SETERRQ1(((PetscObject)da)->comm,PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
259   }
260   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
261   PetscFunctionReturn(0);
262 }
263 
264 /* ---------------------------------------------------------------------------------*/
265 
266 #undef __FUNCT__
267 #define __FUNCT__ "DMGetColoring_DA_3d_MPIAIJ"
268 PetscErrorCode DMGetColoring_DA_3d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
269 {
270   PetscErrorCode    ierr;
271   PetscInt          xs,ys,nx,ny,i,j,gxs,gys,gnx,gny,m,n,p,dim,s,k,nc,col,zs,gzs,ii,l,nz,gnz,M,N,P;
272   PetscInt          ncolors;
273   MPI_Comm          comm;
274   DMDABoundaryType  bx,by,bz;
275   DMDAStencilType   st;
276   ISColoringValue   *colors;
277   DM_DA             *dd = (DM_DA*)da->data;
278 
279   PetscFunctionBegin;
280   /*
281          nc - number of components per grid point
282          col - number of colors needed in one direction for single component problem
283 
284   */
285   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
286   col    = 2*s + 1;
287   if (bx == DMDA_BOUNDARY_PERIODIC && (m % col)){
288     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
289                  by 2*stencil_width + 1\n");
290   }
291   if (by == DMDA_BOUNDARY_PERIODIC && (n % col)){
292     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
293                  by 2*stencil_width + 1\n");
294   }
295   if (bz == DMDA_BOUNDARY_PERIODIC && (p % col)){
296     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
297                  by 2*stencil_width + 1\n");
298   }
299 
300   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
301   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
302   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
303 
304   /* create the coloring */
305   if (ctype == IS_COLORING_GLOBAL) {
306     if (!dd->localcoloring) {
307       ierr = PetscMalloc(nc*nx*ny*nz*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
308       ii = 0;
309       for (k=zs; k<zs+nz; k++) {
310         for (j=ys; j<ys+ny; j++) {
311           for (i=xs; i<xs+nx; i++) {
312             for (l=0; l<nc; l++) {
313               colors[ii++] = l + nc*((i % col) + col*(j % col) + col*col*(k % col));
314             }
315           }
316         }
317       }
318       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
319       ierr = ISColoringCreate(comm,ncolors,nc*nx*ny*nz,colors,&dd->localcoloring);CHKERRQ(ierr);
320     }
321     *coloring = dd->localcoloring;
322   } else if (ctype == IS_COLORING_GHOSTED) {
323     if (!dd->ghostedcoloring) {
324       ierr = PetscMalloc(nc*gnx*gny*gnz*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
325       ii = 0;
326       for (k=gzs; k<gzs+gnz; k++) {
327         for (j=gys; j<gys+gny; j++) {
328           for (i=gxs; i<gxs+gnx; i++) {
329             for (l=0; l<nc; l++) {
330               /* the complicated stuff is to handle periodic boundaries */
331               colors[ii++] = l + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col) + col*col*(SetInRange(k,p) % col));
332             }
333           }
334         }
335       }
336       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
337       ierr = ISColoringCreate(comm,ncolors,nc*gnx*gny*gnz,colors,&dd->ghostedcoloring);CHKERRQ(ierr);
338       ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_GHOSTED);CHKERRQ(ierr);
339     }
340     *coloring = dd->ghostedcoloring;
341   } else SETERRQ1(((PetscObject)da)->comm,PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
342   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
343   PetscFunctionReturn(0);
344 }
345 
346 /* ---------------------------------------------------------------------------------*/
347 
348 #undef __FUNCT__
349 #define __FUNCT__ "DMGetColoring_DA_1d_MPIAIJ"
350 PetscErrorCode DMGetColoring_DA_1d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
351 {
352   PetscErrorCode    ierr;
353   PetscInt          xs,nx,i,i1,gxs,gnx,l,m,M,dim,s,nc,col;
354   PetscInt          ncolors;
355   MPI_Comm          comm;
356   DMDABoundaryType  bx;
357   ISColoringValue   *colors;
358   DM_DA             *dd = (DM_DA*)da->data;
359 
360   PetscFunctionBegin;
361   /*
362          nc - number of components per grid point
363          col - number of colors needed in one direction for single component problem
364 
365   */
366   ierr = DMDAGetInfo(da,&dim,&m,0,0,&M,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr);
367   col    = 2*s + 1;
368 
369   if (bx == DMDA_BOUNDARY_PERIODIC && (m % col)) {
370     SETERRQ2(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points %d is divisible\n\
371                  by 2*stencil_width + 1 %d\n",(int)m,(int)col);
372   }
373 
374   ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
375   ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
376   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
377 
378   /* create the coloring */
379   if (ctype == IS_COLORING_GLOBAL) {
380     if (!dd->localcoloring) {
381       ierr = PetscMalloc(nc*nx*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
382       i1 = 0;
383       for (i=xs; i<xs+nx; i++) {
384         for (l=0; l<nc; l++) {
385           colors[i1++] = l + nc*(i % col);
386         }
387       }
388       ncolors = nc + nc*(col-1);
389       ierr = ISColoringCreate(comm,ncolors,nc*nx,colors,&dd->localcoloring);CHKERRQ(ierr);
390     }
391     *coloring = dd->localcoloring;
392   } else if (ctype == IS_COLORING_GHOSTED) {
393     if (!dd->ghostedcoloring) {
394       ierr = PetscMalloc(nc*gnx*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
395       i1 = 0;
396       for (i=gxs; i<gxs+gnx; i++) {
397         for (l=0; l<nc; l++) {
398           /* the complicated stuff is to handle periodic boundaries */
399           colors[i1++] = l + nc*(SetInRange(i,m) % col);
400         }
401       }
402       ncolors = nc + nc*(col-1);
403       ierr = ISColoringCreate(comm,ncolors,nc*gnx,colors,&dd->ghostedcoloring);CHKERRQ(ierr);
404       ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_GHOSTED);CHKERRQ(ierr);
405     }
406     *coloring = dd->ghostedcoloring;
407   } else SETERRQ1(((PetscObject)da)->comm,PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
408   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
409   PetscFunctionReturn(0);
410 }
411 
412 #undef __FUNCT__
413 #define __FUNCT__ "DMGetColoring_DA_2d_5pt_MPIAIJ"
414 PetscErrorCode DMGetColoring_DA_2d_5pt_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
415 {
416   PetscErrorCode    ierr;
417   PetscInt          xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,dim,s,k,nc;
418   PetscInt          ncolors;
419   MPI_Comm          comm;
420   DMDABoundaryType  bx,by;
421   ISColoringValue   *colors;
422   DM_DA             *dd = (DM_DA*)da->data;
423 
424   PetscFunctionBegin;
425   /*
426          nc - number of components per grid point
427          col - number of colors needed in one direction for single component problem
428 
429   */
430   ierr   = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,0);CHKERRQ(ierr);
431   ierr   = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
432   ierr   = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
433   ierr   = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
434 
435   if (bx == DMDA_BOUNDARY_PERIODIC && (m % 5)){
436     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
437                  by 5\n");
438   }
439   if (by == DMDA_BOUNDARY_PERIODIC && (n % 5)){
440     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
441                  by 5\n");
442   }
443 
444   /* create the coloring */
445   if (ctype == IS_COLORING_GLOBAL) {
446     if (!dd->localcoloring) {
447       ierr = PetscMalloc(nc*nx*ny*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
448       ii = 0;
449       for (j=ys; j<ys+ny; j++) {
450 	for (i=xs; i<xs+nx; i++) {
451 	  for (k=0; k<nc; k++) {
452 	    colors[ii++] = k + nc*((3*j+i) % 5);
453 	  }
454 	}
455       }
456       ncolors = 5*nc;
457       ierr = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,&dd->localcoloring);CHKERRQ(ierr);
458     }
459     *coloring = dd->localcoloring;
460   } else if (ctype == IS_COLORING_GHOSTED) {
461     if (!dd->ghostedcoloring) {
462       ierr = PetscMalloc(nc*gnx*gny*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
463       ii = 0;
464       for (j=gys; j<gys+gny; j++) {
465 	for (i=gxs; i<gxs+gnx; i++) {
466 	  for (k=0; k<nc; k++) {
467 	    colors[ii++] = k + nc*((3*SetInRange(j,n) + SetInRange(i,m)) % 5);
468 	  }
469 	}
470       }
471       ncolors = 5*nc;
472       ierr = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,&dd->ghostedcoloring);CHKERRQ(ierr);
473       ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_GHOSTED);CHKERRQ(ierr);
474     }
475     *coloring = dd->ghostedcoloring;
476   } else SETERRQ1(((PetscObject)da)->comm,PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
477   PetscFunctionReturn(0);
478 }
479 
480 /* =========================================================================== */
481 extern PetscErrorCode DMGetMatrix_DA_1d_MPIAIJ(DM,Mat);
482 extern PetscErrorCode DMGetMatrix_DA_2d_MPIAIJ(DM,Mat);
483 extern PetscErrorCode DMGetMatrix_DA_2d_MPIAIJ_Fill(DM,Mat);
484 extern PetscErrorCode DMGetMatrix_DA_3d_MPIAIJ(DM,Mat);
485 extern PetscErrorCode DMGetMatrix_DA_3d_MPIAIJ_Fill(DM,Mat);
486 extern PetscErrorCode DMGetMatrix_DA_2d_MPIBAIJ(DM,Mat);
487 extern PetscErrorCode DMGetMatrix_DA_3d_MPIBAIJ(DM,Mat);
488 extern PetscErrorCode DMGetMatrix_DA_2d_MPISBAIJ(DM,Mat);
489 extern PetscErrorCode DMGetMatrix_DA_3d_MPISBAIJ(DM,Mat);
490 
491 #undef __FUNCT__
492 #define __FUNCT__ "MatSetDM"
493 /*@
494    MatSetDM - Sets the DMDA that is to be used by the HYPRE_StructMatrix PETSc matrix
495 
496    Logically Collective on Mat
497 
498    Input Parameters:
499 +  mat - the matrix
500 -  da - the da
501 
502    Level: intermediate
503 
504 @*/
505 PetscErrorCode  MatSetDM(Mat mat,DM da)
506 {
507   PetscErrorCode ierr;
508 
509   PetscFunctionBegin;
510   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
511   PetscValidHeaderSpecific(da,DM_CLASSID,1);
512   ierr = PetscTryMethod(mat,"MatSetDM_C",(Mat,DM),(mat,da));CHKERRQ(ierr);
513   PetscFunctionReturn(0);
514 }
515 
516 EXTERN_C_BEGIN
517 #undef __FUNCT__
518 #define __FUNCT__ "MatView_MPI_DA"
519 PetscErrorCode  MatView_MPI_DA(Mat A,PetscViewer viewer)
520 {
521   DM             da;
522   PetscErrorCode ierr;
523   const char     *prefix;
524   Mat            Anatural;
525   AO             ao;
526   PetscInt       rstart,rend,*petsc,i;
527   IS             is;
528   MPI_Comm       comm;
529   PetscViewerFormat format;
530 
531   PetscFunctionBegin;
532   /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */
533   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
534   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(0);
535 
536   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
537   ierr = PetscObjectQuery((PetscObject)A,"DM",(PetscObject*)&da);CHKERRQ(ierr);
538   if (!da) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
539 
540   ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);
541   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
542   ierr = PetscMalloc((rend-rstart)*sizeof(PetscInt),&petsc);CHKERRQ(ierr);
543   for (i=rstart; i<rend; i++) petsc[i-rstart] = i;
544   ierr = AOApplicationToPetsc(ao,rend-rstart,petsc);CHKERRQ(ierr);
545   ierr = ISCreateGeneral(comm,rend-rstart,petsc,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
546 
547   /* call viewer on natural ordering */
548   ierr = MatGetSubMatrix(A,is,is,MAT_INITIAL_MATRIX,&Anatural);CHKERRQ(ierr);
549   ierr = ISDestroy(&is);CHKERRQ(ierr);
550   ierr = PetscObjectGetOptionsPrefix((PetscObject)A,&prefix);CHKERRQ(ierr);
551   ierr = PetscObjectSetOptionsPrefix((PetscObject)Anatural,prefix);CHKERRQ(ierr);
552   ierr = PetscObjectSetName((PetscObject)Anatural,((PetscObject)A)->name);CHKERRQ(ierr);
553   ierr = MatView(Anatural,viewer);CHKERRQ(ierr);
554   ierr = MatDestroy(&Anatural);CHKERRQ(ierr);
555   PetscFunctionReturn(0);
556 }
557 EXTERN_C_END
558 
559 EXTERN_C_BEGIN
560 #undef __FUNCT__
561 #define __FUNCT__ "MatLoad_MPI_DA"
562 PetscErrorCode  MatLoad_MPI_DA(Mat A,PetscViewer viewer)
563 {
564   DM             da;
565   PetscErrorCode ierr;
566   Mat            Anatural,Aapp;
567   AO             ao;
568   PetscInt       rstart,rend,*app,i;
569   IS             is;
570   MPI_Comm       comm;
571 
572   PetscFunctionBegin;
573   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
574   ierr = PetscObjectQuery((PetscObject)A,"DM",(PetscObject*)&da);CHKERRQ(ierr);
575   if (!da) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
576 
577   /* Load the matrix in natural ordering */
578   ierr = MatCreate(((PetscObject)A)->comm,&Anatural);CHKERRQ(ierr);
579   ierr = MatSetType(Anatural,((PetscObject)A)->type_name);CHKERRQ(ierr);
580   ierr = MatSetSizes(Anatural,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
581   ierr = MatLoad(Anatural,viewer);CHKERRQ(ierr);
582 
583   /* Map natural ordering to application ordering and create IS */
584   ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);
585   ierr = MatGetOwnershipRange(Anatural,&rstart,&rend);CHKERRQ(ierr);
586   ierr = PetscMalloc((rend-rstart)*sizeof(PetscInt),&app);CHKERRQ(ierr);
587   for (i=rstart; i<rend; i++) app[i-rstart] = i;
588   ierr = AOPetscToApplication(ao,rend-rstart,app);CHKERRQ(ierr);
589   ierr = ISCreateGeneral(comm,rend-rstart,app,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
590 
591   /* Do permutation and replace header */
592   ierr = MatGetSubMatrix(Anatural,is,is,MAT_INITIAL_MATRIX,&Aapp);CHKERRQ(ierr);
593   ierr = MatHeaderReplace(A,Aapp);CHKERRQ(ierr);
594   ierr = ISDestroy(&is);CHKERRQ(ierr);
595   ierr = MatDestroy(&Anatural);CHKERRQ(ierr);
596   PetscFunctionReturn(0);
597 }
598 EXTERN_C_END
599 
600 #undef __FUNCT__
601 #define __FUNCT__ "DMGetMatrix_DA"
602 PetscErrorCode  DMGetMatrix_DA(DM da, const MatType mtype,Mat *J)
603 {
604   PetscErrorCode ierr;
605   PetscInt       dim,dof,nx,ny,nz,dims[3],starts[3],M,N,P;
606   Mat            A;
607   MPI_Comm       comm;
608   const MatType  Atype;
609   void           (*aij)(void)=PETSC_NULL,(*baij)(void)=PETSC_NULL,(*sbaij)(void)=PETSC_NULL;
610   MatType        ttype[256];
611   PetscBool      flg;
612   PetscMPIInt    size;
613   DM_DA          *dd = (DM_DA*)da->data;
614 
615   PetscFunctionBegin;
616 #ifndef PETSC_USE_DYNAMIC_LIBRARIES
617   ierr = MatInitializePackage(PETSC_NULL);CHKERRQ(ierr);
618 #endif
619   if (!mtype) mtype = MATAIJ;
620   ierr = PetscStrcpy((char*)ttype,mtype);CHKERRQ(ierr);
621   ierr = PetscOptionsBegin(((PetscObject)da)->comm,((PetscObject)da)->prefix,"DMDA options","Mat");CHKERRQ(ierr);
622   ierr = PetscOptionsList("-da_mat_type","Matrix type","MatSetType",MatList,mtype,(char*)ttype,256,&flg);CHKERRQ(ierr);
623   ierr = PetscOptionsEnd();
624 
625   /*
626                                   m
627           ------------------------------------------------------
628          |                                                     |
629          |                                                     |
630          |               ----------------------                |
631          |               |                    |                |
632       n  |           ny  |                    |                |
633          |               |                    |                |
634          |               .---------------------                |
635          |             (xs,ys)     nx                          |
636          |            .                                        |
637          |         (gxs,gys)                                   |
638          |                                                     |
639           -----------------------------------------------------
640   */
641 
642   /*
643          nc - number of components per grid point
644          col - number of colors needed in one direction for single component problem
645 
646   */
647   ierr = DMDAGetInfo(da,&dim,&M,&N,&P,0,0,0,&dof,0,0,0,0,0);CHKERRQ(ierr);
648   ierr = DMDAGetCorners(da,0,0,0,&nx,&ny,&nz);CHKERRQ(ierr);
649   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
650   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
651   ierr = MatSetSizes(A,dof*nx*ny*nz,dof*nx*ny*nz,dof*M*N*P,dof*M*N*P);CHKERRQ(ierr);
652   ierr = MatSetType(A,(const MatType)ttype);CHKERRQ(ierr);
653   ierr = MatSetDM(A,da);CHKERRQ(ierr);
654   ierr = MatSetFromOptions(A);CHKERRQ(ierr);
655   ierr = MatGetType(A,&Atype);CHKERRQ(ierr);
656   /*
657      We do not provide a getmatrix function in the DMDA operations because
658    the basic DMDA does not know about matrices. We think of DMDA as being more
659    more low-level than matrices. This is kind of cheating but, cause sometimes
660    we think of DMDA has higher level than matrices.
661 
662      We could switch based on Atype (or mtype), but we do not since the
663    specialized setting routines depend only the particular preallocation
664    details of the matrix, not the type itself.
665   */
666   ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJSetPreallocation_C",&aij);CHKERRQ(ierr);
667   if (!aij) {
668     ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJSetPreallocation_C",&aij);CHKERRQ(ierr);
669   }
670   if (!aij) {
671     ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIBAIJSetPreallocation_C",&baij);CHKERRQ(ierr);
672     if (!baij) {
673       ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",&baij);CHKERRQ(ierr);
674     }
675     if (!baij){
676       ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPISBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr);
677       if (!sbaij) {
678         ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr);
679       }
680     }
681   }
682   if (aij) {
683     if (dim == 1) {
684       ierr = DMGetMatrix_DA_1d_MPIAIJ(da,A);CHKERRQ(ierr);
685     } else if (dim == 2) {
686       if (dd->ofill) {
687         ierr = DMGetMatrix_DA_2d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
688       } else {
689         ierr = DMGetMatrix_DA_2d_MPIAIJ(da,A);CHKERRQ(ierr);
690       }
691     } else if (dim == 3) {
692       if (dd->ofill) {
693         ierr = DMGetMatrix_DA_3d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
694       } else {
695         ierr = DMGetMatrix_DA_3d_MPIAIJ(da,A);CHKERRQ(ierr);
696       }
697     }
698   } else if (baij) {
699     if (dim == 2) {
700       ierr = DMGetMatrix_DA_2d_MPIBAIJ(da,A);CHKERRQ(ierr);
701     } else if (dim == 3) {
702       ierr = DMGetMatrix_DA_3d_MPIBAIJ(da,A);CHKERRQ(ierr);
703     } else {
704       SETERRQ3(((PetscObject)da)->comm,PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension!\n" \
705 	       "Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
706     }
707   } else if (sbaij) {
708     if (dim == 2) {
709       ierr = DMGetMatrix_DA_2d_MPISBAIJ(da,A);CHKERRQ(ierr);
710     } else if (dim == 3) {
711       ierr = DMGetMatrix_DA_3d_MPISBAIJ(da,A);CHKERRQ(ierr);
712     } else {
713       SETERRQ3(((PetscObject)da)->comm,PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension!\n" \
714 	       "Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
715     }
716   } else {
717     ISLocalToGlobalMapping ltog,ltogb;
718     ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
719     ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
720     ierr = MatSetLocalToGlobalMapping(A,ltog,ltog);CHKERRQ(ierr);
721     ierr = MatSetLocalToGlobalMappingBlock(A,ltogb,ltogb);CHKERRQ(ierr);
722   }
723   ierr = DMDAGetGhostCorners(da,&starts[0],&starts[1],&starts[2],&dims[0],&dims[1],&dims[2]);CHKERRQ(ierr);
724   ierr = MatSetStencil(A,dim,dims,starts,dof);CHKERRQ(ierr);
725   ierr = PetscObjectCompose((PetscObject)A,"DM",(PetscObject)da);CHKERRQ(ierr);
726   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
727   if (size > 1) {
728     /* change viewer to display matrix in natural ordering */
729     ierr = MatShellSetOperation(A, MATOP_VIEW, (void (*)(void)) MatView_MPI_DA);CHKERRQ(ierr);
730     ierr = MatShellSetOperation(A, MATOP_LOAD, (void (*)(void)) MatLoad_MPI_DA);CHKERRQ(ierr);
731   }
732   *J = A;
733   PetscFunctionReturn(0);
734 }
735 
736 /* ---------------------------------------------------------------------------------*/
737 #undef __FUNCT__
738 #define __FUNCT__ "DMGetMatrix_DA_2d_MPIAIJ"
739 PetscErrorCode DMGetMatrix_DA_2d_MPIAIJ(DM da,Mat J)
740 {
741   PetscErrorCode         ierr;
742   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = PETSC_NULL,k,nc,*rows = PETSC_NULL,col,cnt,l,p;
743   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
744   MPI_Comm               comm;
745   PetscScalar            *values;
746   DMDABoundaryType       bx,by;
747   ISLocalToGlobalMapping ltog,ltogb;
748   DMDAStencilType        st;
749 
750   PetscFunctionBegin;
751   /*
752          nc - number of components per grid point
753          col - number of colors needed in one direction for single component problem
754 
755   */
756   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
757   col = 2*s + 1;
758   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
759   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
760   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
761 
762   ierr = PetscMalloc2(nc,PetscInt,&rows,col*col*nc*nc,PetscInt,&cols);CHKERRQ(ierr);
763   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
764   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
765 
766   /* determine the matrix preallocation information */
767   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
768   for (i=xs; i<xs+nx; i++) {
769 
770     pstart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
771     pend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
772 
773     for (j=ys; j<ys+ny; j++) {
774       slot = i - gxs + gnx*(j - gys);
775 
776       lstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
777       lend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
778 
779       cnt  = 0;
780       for (k=0; k<nc; k++) {
781 	for (l=lstart; l<lend+1; l++) {
782 	  for (p=pstart; p<pend+1; p++) {
783 	    if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
784 	      cols[cnt++]  = k + nc*(slot + gnx*l + p);
785 	    }
786 	  }
787 	}
788 	rows[k] = k + nc*(slot);
789       }
790       ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
791     }
792   }
793   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
794   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
795   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
796   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
797 
798   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
799   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
800 
801   /*
802     For each node in the grid: we get the neighbors in the local (on processor ordering
803     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
804     PETSc ordering.
805   */
806   if (!da->prealloc_only) {
807     ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
808     ierr = PetscMemzero(values,col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
809     for (i=xs; i<xs+nx; i++) {
810 
811       pstart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
812       pend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
813 
814       for (j=ys; j<ys+ny; j++) {
815 	slot = i - gxs + gnx*(j - gys);
816 
817 	lstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
818 	lend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
819 
820 	cnt  = 0;
821 	for (k=0; k<nc; k++) {
822 	  for (l=lstart; l<lend+1; l++) {
823 	    for (p=pstart; p<pend+1; p++) {
824 	      if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
825 		cols[cnt++]  = k + nc*(slot + gnx*l + p);
826 	      }
827 	    }
828 	  }
829 	  rows[k]      = k + nc*(slot);
830 	}
831 	ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
832       }
833     }
834     ierr = PetscFree(values);CHKERRQ(ierr);
835     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
836     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
837   }
838   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
839   PetscFunctionReturn(0);
840 }
841 
842 #undef __FUNCT__
843 #define __FUNCT__ "DMGetMatrix_DA_2d_MPIAIJ_Fill"
844 PetscErrorCode DMGetMatrix_DA_2d_MPIAIJ_Fill(DM da,Mat J)
845 {
846   PetscErrorCode         ierr;
847   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
848   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt,l,p;
849   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
850   DM_DA                  *dd = (DM_DA*)da->data;
851   PetscInt               ifill_col,*ofill = dd->ofill, *dfill = dd->dfill;
852   MPI_Comm               comm;
853   PetscScalar            *values;
854   DMDABoundaryType       bx,by;
855   ISLocalToGlobalMapping ltog,ltogb;
856   DMDAStencilType        st;
857 
858   PetscFunctionBegin;
859   /*
860          nc - number of components per grid point
861          col - number of colors needed in one direction for single component problem
862 
863   */
864   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
865   col = 2*s + 1;
866   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
867   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
868   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
869 
870   ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscInt),&cols);CHKERRQ(ierr);
871   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
872   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
873 
874   /* determine the matrix preallocation information */
875   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
876   for (i=xs; i<xs+nx; i++) {
877 
878     pstart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
879     pend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
880 
881     for (j=ys; j<ys+ny; j++) {
882       slot = i - gxs + gnx*(j - gys);
883 
884       lstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
885       lend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
886 
887       for (k=0; k<nc; k++) {
888         cnt  = 0;
889 	for (l=lstart; l<lend+1; l++) {
890 	  for (p=pstart; p<pend+1; p++) {
891             if (l || p) {
892 	      if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
893                 for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++)
894 		  cols[cnt++]  = ofill[ifill_col] + nc*(slot + gnx*l + p);
895 	      }
896             } else {
897 	      if (dfill) {
898 		for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++)
899 		  cols[cnt++]  = dfill[ifill_col] + nc*(slot + gnx*l + p);
900 	      } else {
901 		for (ifill_col=0; ifill_col<nc; ifill_col++)
902 		  cols[cnt++]  = ifill_col + nc*(slot + gnx*l + p);
903 	      }
904             }
905 	  }
906 	}
907 	row = k + nc*(slot);
908         ierr = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
909       }
910     }
911   }
912   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
913   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
914   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
915   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
916   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
917 
918   /*
919     For each node in the grid: we get the neighbors in the local (on processor ordering
920     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
921     PETSc ordering.
922   */
923   if (!da->prealloc_only) {
924     ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
925     ierr = PetscMemzero(values,col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
926     for (i=xs; i<xs+nx; i++) {
927 
928       pstart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
929       pend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
930 
931       for (j=ys; j<ys+ny; j++) {
932 	slot = i - gxs + gnx*(j - gys);
933 
934 	lstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
935 	lend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
936 
937 	for (k=0; k<nc; k++) {
938 	  cnt  = 0;
939 	  for (l=lstart; l<lend+1; l++) {
940 	    for (p=pstart; p<pend+1; p++) {
941 	      if (l || p) {
942 		if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
943 		  for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++)
944 		    cols[cnt++]  = ofill[ifill_col] + nc*(slot + gnx*l + p);
945 		}
946 	      } else {
947 		if (dfill) {
948 		  for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++)
949 		    cols[cnt++]  = dfill[ifill_col] + nc*(slot + gnx*l + p);
950 		} else {
951 		  for (ifill_col=0; ifill_col<nc; ifill_col++)
952 		    cols[cnt++]  = ifill_col + nc*(slot + gnx*l + p);
953 		}
954 	      }
955 	    }
956 	  }
957 	  row  = k + nc*(slot);
958 	  ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
959 	}
960       }
961     }
962     ierr = PetscFree(values);CHKERRQ(ierr);
963     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
964     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
965   }
966   ierr = PetscFree(cols);CHKERRQ(ierr);
967   PetscFunctionReturn(0);
968 }
969 
970 /* ---------------------------------------------------------------------------------*/
971 
972 #undef __FUNCT__
973 #define __FUNCT__ "DMGetMatrix_DA_3d_MPIAIJ"
974 PetscErrorCode DMGetMatrix_DA_3d_MPIAIJ(DM da,Mat J)
975 {
976   PetscErrorCode         ierr;
977   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
978   PetscInt               m,n,dim,s,*cols = PETSC_NULL,k,nc,*rows = PETSC_NULL,col,cnt,l,p,*dnz = PETSC_NULL,*onz = PETSC_NULL;
979   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
980   MPI_Comm               comm;
981   PetscScalar            *values;
982   DMDABoundaryType       bx,by,bz;
983   ISLocalToGlobalMapping ltog,ltogb;
984   DMDAStencilType        st;
985 
986   PetscFunctionBegin;
987   /*
988          nc - number of components per grid point
989          col - number of colors needed in one direction for single component problem
990 
991   */
992   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
993   col    = 2*s + 1;
994 
995   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
996   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
997   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
998 
999   ierr = PetscMalloc2(nc,PetscInt,&rows,col*col*col*nc*nc,PetscInt,&cols);CHKERRQ(ierr);
1000   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1001   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1002 
1003   /* determine the matrix preallocation information */
1004   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1005   for (i=xs; i<xs+nx; i++) {
1006     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1007     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1008     for (j=ys; j<ys+ny; j++) {
1009       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1010       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1011       for (k=zs; k<zs+nz; k++) {
1012 	kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1013 	kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1014 
1015 	slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1016 
1017 	cnt  = 0;
1018 	for (l=0; l<nc; l++) {
1019 	  for (ii=istart; ii<iend+1; ii++) {
1020 	    for (jj=jstart; jj<jend+1; jj++) {
1021 	      for (kk=kstart; kk<kend+1; kk++) {
1022 		if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1023 		  cols[cnt++]  = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1024 		}
1025 	      }
1026 	    }
1027 	  }
1028 	  rows[l] = l + nc*(slot);
1029 	}
1030 	ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1031       }
1032     }
1033   }
1034   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1035   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1036   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1037   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1038   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1039   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1040 
1041   /*
1042     For each node in the grid: we get the neighbors in the local (on processor ordering
1043     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1044     PETSc ordering.
1045   */
1046   if (!da->prealloc_only) {
1047     ierr = PetscMalloc(col*col*col*nc*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1048     ierr = PetscMemzero(values,col*col*col*nc*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1049     for (i=xs; i<xs+nx; i++) {
1050       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1051       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1052       for (j=ys; j<ys+ny; j++) {
1053 	jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1054 	jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1055 	for (k=zs; k<zs+nz; k++) {
1056 	  kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1057 	  kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1058 
1059 	  slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1060 
1061 	  cnt  = 0;
1062 	  for (l=0; l<nc; l++) {
1063 	    for (ii=istart; ii<iend+1; ii++) {
1064 	      for (jj=jstart; jj<jend+1; jj++) {
1065 		for (kk=kstart; kk<kend+1; kk++) {
1066 		  if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1067 		    cols[cnt++]  = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1068 		  }
1069 		}
1070 	      }
1071 	    }
1072 	    rows[l]      = l + nc*(slot);
1073 	  }
1074 	  ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1075 	}
1076       }
1077     }
1078     ierr = PetscFree(values);CHKERRQ(ierr);
1079     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1080     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1081   }
1082   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1083   PetscFunctionReturn(0);
1084 }
1085 
1086 /* ---------------------------------------------------------------------------------*/
1087 
1088 #undef __FUNCT__
1089 #define __FUNCT__ "DMGetMatrix_DA_1d_MPIAIJ"
1090 PetscErrorCode DMGetMatrix_DA_1d_MPIAIJ(DM da,Mat J)
1091 {
1092   PetscErrorCode         ierr;
1093   PetscInt               xs,nx,i,i1,slot,gxs,gnx;
1094   PetscInt               m,dim,s,*cols = PETSC_NULL,nc,*rows = PETSC_NULL,col,cnt,l;
1095   PetscInt               istart,iend;
1096   PetscScalar            *values;
1097   DMDABoundaryType       bx;
1098   ISLocalToGlobalMapping ltog,ltogb;
1099 
1100   PetscFunctionBegin;
1101   /*
1102          nc - number of components per grid point
1103          col - number of colors needed in one direction for single component problem
1104 
1105   */
1106   ierr = DMDAGetInfo(da,&dim,&m,0,0,0,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr);
1107   col    = 2*s + 1;
1108 
1109   ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
1110   ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
1111 
1112   ierr = MatSeqAIJSetPreallocation(J,col*nc,0);CHKERRQ(ierr);
1113   ierr = MatMPIAIJSetPreallocation(J,col*nc,0,col*nc,0);CHKERRQ(ierr);
1114   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1115   ierr = PetscMalloc2(nc,PetscInt,&rows,col*nc*nc,PetscInt,&cols);CHKERRQ(ierr);
1116 
1117   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1118   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1119   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1120   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1121 
1122   /*
1123     For each node in the grid: we get the neighbors in the local (on processor ordering
1124     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1125     PETSc ordering.
1126   */
1127   if (!da->prealloc_only) {
1128     ierr = PetscMalloc(col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1129     ierr = PetscMemzero(values,col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1130     for (i=xs; i<xs+nx; i++) {
1131       istart = PetscMax(-s,gxs - i);
1132       iend   = PetscMin(s,gxs + gnx - i - 1);
1133       slot   = i - gxs;
1134 
1135       cnt  = 0;
1136       for (l=0; l<nc; l++) {
1137 	for (i1=istart; i1<iend+1; i1++) {
1138 	  cols[cnt++] = l + nc*(slot + i1);
1139 	}
1140 	rows[l]      = l + nc*(slot);
1141       }
1142       ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1143     }
1144     ierr = PetscFree(values);CHKERRQ(ierr);
1145     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1146     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1147   }
1148   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1149   PetscFunctionReturn(0);
1150 }
1151 
1152 #undef __FUNCT__
1153 #define __FUNCT__ "DMGetMatrix_DA_2d_MPIBAIJ"
1154 PetscErrorCode DMGetMatrix_DA_2d_MPIBAIJ(DM da,Mat J)
1155 {
1156   PetscErrorCode         ierr;
1157   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1158   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
1159   PetscInt               istart,iend,jstart,jend,ii,jj;
1160   MPI_Comm               comm;
1161   PetscScalar            *values;
1162   DMDABoundaryType       bx,by;
1163   DMDAStencilType        st;
1164   ISLocalToGlobalMapping ltog,ltogb;
1165 
1166   PetscFunctionBegin;
1167   /*
1168      nc - number of components per grid point
1169      col - number of colors needed in one direction for single component problem
1170   */
1171   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
1172   col = 2*s + 1;
1173 
1174   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
1175   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
1176   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1177 
1178   ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1179 
1180   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1181   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1182 
1183   /* determine the matrix preallocation information */
1184   ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
1185   for (i=xs; i<xs+nx; i++) {
1186     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1187     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1188     for (j=ys; j<ys+ny; j++) {
1189       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1190       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1191       slot = i - gxs + gnx*(j - gys);
1192 
1193       /* Find block columns in block row */
1194       cnt  = 0;
1195       for (ii=istart; ii<iend+1; ii++) {
1196         for (jj=jstart; jj<jend+1; jj++) {
1197           if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1198             cols[cnt++]  = slot + ii + gnx*jj;
1199           }
1200         }
1201       }
1202       ierr = MatPreallocateSetLocal(ltogb,1,&slot,ltogb,cnt,cols,dnz,onz);CHKERRQ(ierr);
1203     }
1204   }
1205   ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1206   ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1207   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1208 
1209   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1210   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1211 
1212   /*
1213     For each node in the grid: we get the neighbors in the local (on processor ordering
1214     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1215     PETSc ordering.
1216   */
1217   if (!da->prealloc_only) {
1218     ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1219     ierr = PetscMemzero(values,col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1220     for (i=xs; i<xs+nx; i++) {
1221       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1222       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1223       for (j=ys; j<ys+ny; j++) {
1224 	jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1225 	jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1226 	slot = i - gxs + gnx*(j - gys);
1227 	cnt  = 0;
1228         for (ii=istart; ii<iend+1; ii++) {
1229           for (jj=jstart; jj<jend+1; jj++) {
1230             if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1231               cols[cnt++]  = slot + ii + gnx*jj;
1232             }
1233           }
1234         }
1235 	ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1236       }
1237     }
1238     ierr = PetscFree(values);CHKERRQ(ierr);
1239     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1240     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1241   }
1242   ierr = PetscFree(cols);CHKERRQ(ierr);
1243   PetscFunctionReturn(0);
1244 }
1245 
1246 #undef __FUNCT__
1247 #define __FUNCT__ "DMGetMatrix_DA_3d_MPIBAIJ"
1248 PetscErrorCode DMGetMatrix_DA_3d_MPIBAIJ(DM da,Mat J)
1249 {
1250   PetscErrorCode         ierr;
1251   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1252   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
1253   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1254   MPI_Comm               comm;
1255   PetscScalar            *values;
1256   DMDABoundaryType       bx,by,bz;
1257   DMDAStencilType        st;
1258   ISLocalToGlobalMapping ltog,ltogb;
1259 
1260   PetscFunctionBegin;
1261   /*
1262          nc - number of components per grid point
1263          col - number of colors needed in one direction for single component problem
1264 
1265   */
1266   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1267   col    = 2*s + 1;
1268 
1269   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1270   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1271   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1272 
1273   ierr  = PetscMalloc(col*col*col*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1274 
1275   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1276   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1277 
1278   /* determine the matrix preallocation information */
1279   ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1280   for (i=xs; i<xs+nx; i++) {
1281     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1282     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1283     for (j=ys; j<ys+ny; j++) {
1284       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1285       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1286       for (k=zs; k<zs+nz; k++) {
1287 	kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1288 	kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1289 
1290 	slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1291 
1292 	/* Find block columns in block row */
1293 	cnt  = 0;
1294         for (ii=istart; ii<iend+1; ii++) {
1295           for (jj=jstart; jj<jend+1; jj++) {
1296             for (kk=kstart; kk<kend+1; kk++) {
1297               if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1298 		cols[cnt++]  = slot + ii + gnx*jj + gnx*gny*kk;
1299 	      }
1300 	    }
1301 	  }
1302 	}
1303 	ierr = MatPreallocateSetLocal(ltogb,1,&slot,ltogb,cnt,cols,dnz,onz);CHKERRQ(ierr);
1304       }
1305     }
1306   }
1307   ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1308   ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1309   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1310 
1311   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1312   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1313 
1314   /*
1315     For each node in the grid: we get the neighbors in the local (on processor ordering
1316     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1317     PETSc ordering.
1318   */
1319   if (!da->prealloc_only) {
1320     ierr  = PetscMalloc(col*col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1321     ierr  = PetscMemzero(values,col*col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1322     for (i=xs; i<xs+nx; i++) {
1323       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1324       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1325       for (j=ys; j<ys+ny; j++) {
1326 	jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1327 	jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1328 	for (k=zs; k<zs+nz; k++) {
1329 	  kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1330 	  kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1331 
1332 	  slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1333 
1334 	  cnt  = 0;
1335           for (ii=istart; ii<iend+1; ii++) {
1336             for (jj=jstart; jj<jend+1; jj++) {
1337               for (kk=kstart; kk<kend+1; kk++) {
1338                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1339                   cols[cnt++]  = slot + ii + gnx*jj + gnx*gny*kk;
1340                 }
1341               }
1342             }
1343           }
1344 	  ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1345 	}
1346       }
1347     }
1348     ierr = PetscFree(values);CHKERRQ(ierr);
1349     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1350     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1351   }
1352   ierr = PetscFree(cols);CHKERRQ(ierr);
1353   PetscFunctionReturn(0);
1354 }
1355 
1356 #undef __FUNCT__
1357 #define __FUNCT__ "L2GFilterUpperTriangular"
1358 /*
1359   This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to
1360   identify in the local ordering with periodic domain.
1361 */
1362 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog,PetscInt *row,PetscInt *cnt,PetscInt col[])
1363 {
1364   PetscErrorCode ierr;
1365   PetscInt       i,n;
1366 
1367   PetscFunctionBegin;
1368   ierr = ISLocalToGlobalMappingApply(ltog,1,row,row);CHKERRQ(ierr);
1369   ierr = ISLocalToGlobalMappingApply(ltog,*cnt,col,col);CHKERRQ(ierr);
1370   for (i=0,n=0; i<*cnt; i++) {
1371     if (col[i] >= *row) col[n++] = col[i];
1372   }
1373   *cnt = n;
1374   PetscFunctionReturn(0);
1375 }
1376 
1377 #undef __FUNCT__
1378 #define __FUNCT__ "DMGetMatrix_DA_2d_MPISBAIJ"
1379 PetscErrorCode DMGetMatrix_DA_2d_MPISBAIJ(DM da,Mat J)
1380 {
1381   PetscErrorCode         ierr;
1382   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1383   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
1384   PetscInt               istart,iend,jstart,jend,ii,jj;
1385   MPI_Comm               comm;
1386   PetscScalar            *values;
1387   DMDABoundaryType       bx,by;
1388   DMDAStencilType        st;
1389   ISLocalToGlobalMapping ltog,ltogb;
1390 
1391   PetscFunctionBegin;
1392   /*
1393      nc - number of components per grid point
1394      col - number of colors needed in one direction for single component problem
1395   */
1396   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
1397   col = 2*s + 1;
1398 
1399   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
1400   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
1401   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1402 
1403   ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1404 
1405   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1406   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1407 
1408   /* determine the matrix preallocation information */
1409   ierr = MatPreallocateSymmetricInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
1410   for (i=xs; i<xs+nx; i++) {
1411     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1412     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1413     for (j=ys; j<ys+ny; j++) {
1414       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1415       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1416       slot = i - gxs + gnx*(j - gys);
1417 
1418       /* Find block columns in block row */
1419       cnt  = 0;
1420       for (ii=istart; ii<iend+1; ii++) {
1421         for (jj=jstart; jj<jend+1; jj++) {
1422           if (st == DMDA_STENCIL_BOX || !ii || !jj) {
1423             cols[cnt++]  = slot + ii + gnx*jj;
1424           }
1425         }
1426       }
1427       ierr = L2GFilterUpperTriangular(ltogb,&slot,&cnt,cols);CHKERRQ(ierr);
1428       ierr = MatPreallocateSymmetricSet(slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
1429     }
1430   }
1431   ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1432   ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1433   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1434 
1435   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1436   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1437 
1438   /*
1439     For each node in the grid: we get the neighbors in the local (on processor ordering
1440     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1441     PETSc ordering.
1442   */
1443   if (!da->prealloc_only) {
1444     ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1445     ierr = PetscMemzero(values,col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1446     for (i=xs; i<xs+nx; i++) {
1447       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1448       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1449       for (j=ys; j<ys+ny; j++) {
1450         jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1451         jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1452         slot = i - gxs + gnx*(j - gys);
1453 
1454         /* Find block columns in block row */
1455         cnt  = 0;
1456         for (ii=istart; ii<iend+1; ii++) {
1457           for (jj=jstart; jj<jend+1; jj++) {
1458             if (st == DMDA_STENCIL_BOX || !ii || !jj) {
1459               cols[cnt++]  = slot + ii + gnx*jj;
1460             }
1461           }
1462         }
1463         ierr = L2GFilterUpperTriangular(ltogb,&slot,&cnt,cols);CHKERRQ(ierr);
1464 	ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1465       }
1466     }
1467     ierr = PetscFree(values);CHKERRQ(ierr);
1468     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1469     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1470   }
1471   ierr = PetscFree(cols);CHKERRQ(ierr);
1472   PetscFunctionReturn(0);
1473 }
1474 
1475 #undef __FUNCT__
1476 #define __FUNCT__ "DMGetMatrix_DA_3d_MPISBAIJ"
1477 PetscErrorCode DMGetMatrix_DA_3d_MPISBAIJ(DM da,Mat J)
1478 {
1479   PetscErrorCode         ierr;
1480   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1481   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
1482   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1483   MPI_Comm               comm;
1484   PetscScalar            *values;
1485   DMDABoundaryType       bx,by,bz;
1486   DMDAStencilType        st;
1487   ISLocalToGlobalMapping ltog,ltogb;
1488 
1489   PetscFunctionBegin;
1490   /*
1491      nc - number of components per grid point
1492      col - number of colors needed in one direction for single component problem
1493   */
1494   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1495   col = 2*s + 1;
1496 
1497   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1498   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1499   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1500 
1501   /* create the matrix */
1502   ierr = PetscMalloc(col*col*col*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1503 
1504   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1505   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1506 
1507   /* determine the matrix preallocation information */
1508   ierr = MatPreallocateSymmetricInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1509   for (i=xs; i<xs+nx; i++) {
1510     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1511     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1512     for (j=ys; j<ys+ny; j++) {
1513       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1514       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1515       for (k=zs; k<zs+nz; k++) {
1516         kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1517 	kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1518 
1519 	slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1520 
1521 	/* Find block columns in block row */
1522 	cnt  = 0;
1523         for (ii=istart; ii<iend+1; ii++) {
1524           for (jj=jstart; jj<jend+1; jj++) {
1525             for (kk=kstart; kk<kend+1; kk++) {
1526               if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
1527                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1528               }
1529             }
1530           }
1531         }
1532         ierr = L2GFilterUpperTriangular(ltogb,&slot,&cnt,cols);CHKERRQ(ierr);
1533         ierr = MatPreallocateSymmetricSet(slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
1534       }
1535     }
1536   }
1537   ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1538   ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1539   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1540 
1541   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1542   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1543 
1544   /*
1545     For each node in the grid: we get the neighbors in the local (on processor ordering
1546     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1547     PETSc ordering.
1548   */
1549   if (!da->prealloc_only) {
1550     ierr = PetscMalloc(col*col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1551     ierr = PetscMemzero(values,col*col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1552     for (i=xs; i<xs+nx; i++) {
1553       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1554       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1555       for (j=ys; j<ys+ny; j++) {
1556         jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1557 	jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1558 	for (k=zs; k<zs+nz; k++) {
1559           kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1560 	  kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1561 
1562 	  slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1563 
1564 	  cnt  = 0;
1565           for (ii=istart; ii<iend+1; ii++) {
1566             for (jj=jstart; jj<jend+1; jj++) {
1567               for (kk=kstart; kk<kend+1; kk++) {
1568                 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
1569 		  cols[cnt++]  = slot + ii + gnx*jj + gnx*gny*kk;
1570 		}
1571 	      }
1572 	    }
1573 	  }
1574           ierr = L2GFilterUpperTriangular(ltogb,&slot,&cnt,cols);CHKERRQ(ierr);
1575           ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1576 	}
1577       }
1578     }
1579     ierr = PetscFree(values);CHKERRQ(ierr);
1580     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1581     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1582   }
1583   ierr = PetscFree(cols);CHKERRQ(ierr);
1584   PetscFunctionReturn(0);
1585 }
1586 
1587 /* ---------------------------------------------------------------------------------*/
1588 
1589 #undef __FUNCT__
1590 #define __FUNCT__ "DMGetMatrix_DA_3d_MPIAIJ_Fill"
1591 PetscErrorCode DMGetMatrix_DA_3d_MPIAIJ_Fill(DM da,Mat J)
1592 {
1593   PetscErrorCode         ierr;
1594   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1595   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt,l,p,*dnz,*onz;
1596   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1597   DM_DA                  *dd = (DM_DA*)da->data;
1598   PetscInt               ifill_col,*dfill = dd->dfill,*ofill = dd->ofill;
1599   MPI_Comm               comm;
1600   PetscScalar            *values;
1601   DMDABoundaryType       bx,by,bz;
1602   ISLocalToGlobalMapping ltog,ltogb;
1603   DMDAStencilType        st;
1604 
1605   PetscFunctionBegin;
1606   /*
1607          nc - number of components per grid point
1608          col - number of colors needed in one direction for single component problem
1609 
1610   */
1611   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1612   col    = 2*s + 1;
1613   if (bx == DMDA_BOUNDARY_PERIODIC && (m % col)){
1614     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
1615                  by 2*stencil_width + 1\n");
1616   }
1617   if (by == DMDA_BOUNDARY_PERIODIC && (n % col)){
1618     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
1619                  by 2*stencil_width + 1\n");
1620   }
1621   if (bz == DMDA_BOUNDARY_PERIODIC && (p % col)){
1622     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
1623                  by 2*stencil_width + 1\n");
1624   }
1625 
1626   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1627   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1628   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1629 
1630   ierr = PetscMalloc(col*col*col*nc*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1631   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1632   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1633 
1634   /* determine the matrix preallocation information */
1635   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1636 
1637 
1638   for (i=xs; i<xs+nx; i++) {
1639     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1640     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1641     for (j=ys; j<ys+ny; j++) {
1642       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1643       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1644       for (k=zs; k<zs+nz; k++) {
1645         kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1646         kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1647 
1648         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1649 
1650 	for (l=0; l<nc; l++) {
1651 	  cnt  = 0;
1652 	  for (ii=istart; ii<iend+1; ii++) {
1653 	    for (jj=jstart; jj<jend+1; jj++) {
1654 	      for (kk=kstart; kk<kend+1; kk++) {
1655 		if (ii || jj || kk) {
1656 		  if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1657 		    for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++)
1658 		      cols[cnt++]  = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1659 		  }
1660 		} else {
1661 		  if (dfill) {
1662 		    for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++)
1663 		      cols[cnt++]  = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1664 		  } else {
1665 		    for (ifill_col=0; ifill_col<nc; ifill_col++)
1666 		      cols[cnt++]  = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1667 		  }
1668 		}
1669 	      }
1670 	    }
1671 	  }
1672 	  row  = l + nc*(slot);
1673 	  ierr = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1674 	}
1675       }
1676     }
1677   }
1678   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1679   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1680   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1681   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1682   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1683 
1684   /*
1685     For each node in the grid: we get the neighbors in the local (on processor ordering
1686     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1687     PETSc ordering.
1688   */
1689   if (!da->prealloc_only) {
1690     ierr = PetscMalloc(col*col*col*nc*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1691     ierr = PetscMemzero(values,col*col*col*nc*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1692     for (i=xs; i<xs+nx; i++) {
1693       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1694       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1695       for (j=ys; j<ys+ny; j++) {
1696 	jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1697 	jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1698 	for (k=zs; k<zs+nz; k++) {
1699 	  kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1700 	  kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1701 
1702 	  slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1703 
1704 	  for (l=0; l<nc; l++) {
1705 	    cnt  = 0;
1706 	    for (ii=istart; ii<iend+1; ii++) {
1707 	      for (jj=jstart; jj<jend+1; jj++) {
1708 		for (kk=kstart; kk<kend+1; kk++) {
1709 		  if (ii || jj || kk) {
1710 		    if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1711 		      for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++)
1712 			cols[cnt++]  = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1713 		    }
1714 		  } else {
1715 		    if (dfill) {
1716 		      for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++)
1717 			cols[cnt++]  = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1718 		    } else {
1719 		      for (ifill_col=0; ifill_col<nc; ifill_col++)
1720 			cols[cnt++]  = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1721 		    }
1722 		  }
1723 		}
1724 	      }
1725 	    }
1726 	    row  = l + nc*(slot);
1727 	    ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1728 	  }
1729 	}
1730       }
1731     }
1732     ierr = PetscFree(values);CHKERRQ(ierr);
1733     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1734     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1735   }
1736   ierr = PetscFree(cols);CHKERRQ(ierr);
1737   PetscFunctionReturn(0);
1738 }
1739