xref: /petsc/src/dm/impls/da/fdda.c (revision 670f3ff9a12f3667ff7d4cebfc50ebc8579c9e33)
1 
2 #include <private/daimpl.h> /*I      "petscdmda.h"     I*/
3 #include <petscmat.h>         /*I      "petscmat.h"    I*/
4 #include <private/matimpl.h>
5 
6 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM,ISColoringType,ISColoring *);
7 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM,ISColoringType,ISColoring *);
8 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM,ISColoringType,ISColoring *);
9 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM,ISColoringType,ISColoring *);
10 
11 /*
12    For ghost i that may be negative or greater than the upper bound this
13   maps it into the 0:m-1 range using periodicity
14 */
15 #define SetInRange(i,m) ((i < 0) ? m+i:((i >= m) ? i-m:i))
16 
17 #undef __FUNCT__
18 #define __FUNCT__ "DMDASetBlockFills_Private"
19 static PetscErrorCode DMDASetBlockFills_Private(PetscInt *dfill,PetscInt w,PetscInt **rfill)
20 {
21   PetscErrorCode ierr;
22   PetscInt       i,j,nz,*fill;
23 
24   PetscFunctionBegin;
25   if (!dfill) PetscFunctionReturn(0);
26 
27   /* count number nonzeros */
28   nz = 0;
29   for (i=0; i<w; i++) {
30     for (j=0; j<w; j++) {
31       if (dfill[w*i+j]) nz++;
32     }
33   }
34   ierr = PetscMalloc((nz + w + 1)*sizeof(PetscInt),&fill);CHKERRQ(ierr);
35   /* construct modified CSR storage of nonzero structure */
36   nz = w + 1;
37   for (i=0; i<w; i++) {
38     fill[i] = nz;
39     for (j=0; j<w; j++) {
40       if (dfill[w*i+j]) {
41 	fill[nz] = j;
42 	nz++;
43       }
44     }
45   }
46   fill[w] = nz;
47 
48   *rfill = fill;
49   PetscFunctionReturn(0);
50 }
51 
52 #undef __FUNCT__
53 #define __FUNCT__ "DMDASetBlockFills"
54 /*@
55     DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem
56     of the matrix returned by DMCreateMatrix().
57 
58     Logically Collective on DMDA
59 
60     Input Parameter:
61 +   da - the distributed array
62 .   dfill - the fill pattern in the diagonal block (may be PETSC_NULL, means use dense block)
63 -   ofill - the fill pattern in the off-diagonal blocks
64 
65 
66     Level: developer
67 
68     Notes: This only makes sense when you are doing multicomponent problems but using the
69        MPIAIJ matrix format
70 
71            The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries
72        representing coupling and 0 entries for missing coupling. For example
73 $             dfill[9] = {1, 0, 0,
74 $                         1, 1, 0,
75 $                         0, 1, 1}
76        means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with
77        itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the
78        diagonal block).
79 
80      DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
81      can be represented in the dfill, ofill format
82 
83    Contributed by Glenn Hammond
84 
85 .seealso DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly()
86 
87 @*/
88 PetscErrorCode  DMDASetBlockFills(DM da,PetscInt *dfill,PetscInt *ofill)
89 {
90   DM_DA          *dd = (DM_DA*)da->data;
91   PetscErrorCode ierr;
92 
93   PetscFunctionBegin;
94   ierr = DMDASetBlockFills_Private(dfill,dd->w,&dd->dfill);CHKERRQ(ierr);
95   ierr = DMDASetBlockFills_Private(ofill,dd->w,&dd->ofill);CHKERRQ(ierr);
96   PetscFunctionReturn(0);
97 }
98 
99 
100 #undef __FUNCT__
101 #define __FUNCT__ "DMCreateColoring_DA"
102 PetscErrorCode  DMCreateColoring_DA(DM da,ISColoringType ctype,const MatType mtype,ISColoring *coloring)
103 {
104   PetscErrorCode   ierr;
105   PetscInt         dim,m,n,p,nc;
106   DMDABoundaryType bx,by,bz;
107   MPI_Comm         comm;
108   PetscMPIInt      size;
109   PetscBool        isBAIJ;
110   DM_DA            *dd = (DM_DA*)da->data;
111 
112   PetscFunctionBegin;
113   /*
114                                   m
115           ------------------------------------------------------
116          |                                                     |
117          |                                                     |
118          |               ----------------------                |
119          |               |                    |                |
120       n  |           yn  |                    |                |
121          |               |                    |                |
122          |               .---------------------                |
123          |             (xs,ys)     xn                          |
124          |            .                                        |
125          |         (gxs,gys)                                   |
126          |                                                     |
127           -----------------------------------------------------
128   */
129 
130   /*
131          nc - number of components per grid point
132          col - number of colors needed in one direction for single component problem
133 
134   */
135   ierr = DMDAGetInfo(da,&dim,0,0,0,&m,&n,&p,&nc,0,&bx,&by,&bz,0);CHKERRQ(ierr);
136 
137   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
138   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
139   if (ctype == IS_COLORING_GHOSTED){
140     if (size == 1) {
141       ctype = IS_COLORING_GLOBAL;
142     } else if (dim > 1){
143       if ((m==1 && bx == DMDA_BOUNDARY_PERIODIC) || (n==1 && by == DMDA_BOUNDARY_PERIODIC) || (p==1 && bz == DMDA_BOUNDARY_PERIODIC)){
144         SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"IS_COLORING_GHOSTED cannot be used for periodic boundary condition having both ends of the domain  on the same process");
145       }
146     }
147   }
148 
149   /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ
150      matrices is for the blocks, not the individual matrix elements  */
151   ierr = PetscStrcmp(mtype,MATBAIJ,&isBAIJ);CHKERRQ(ierr);
152   if (!isBAIJ) {ierr = PetscStrcmp(mtype,MATMPIBAIJ,&isBAIJ);CHKERRQ(ierr);}
153   if (!isBAIJ) {ierr = PetscStrcmp(mtype,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);}
154   if (isBAIJ) {
155     dd->w = 1;
156     dd->xs = dd->xs/nc;
157     dd->xe = dd->xe/nc;
158     dd->Xs = dd->Xs/nc;
159     dd->Xe = dd->Xe/nc;
160   }
161 
162   /*
163      We do not provide a getcoloring function in the DMDA operations because
164    the basic DMDA does not know about matrices. We think of DMDA as being more
165    more low-level then matrices.
166   */
167   if (dim == 1) {
168     ierr = DMCreateColoring_DA_1d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
169   } else if (dim == 2) {
170     ierr =  DMCreateColoring_DA_2d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
171   } else if (dim == 3) {
172     ierr =  DMCreateColoring_DA_3d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
173   } else SETERRQ1(((PetscObject)da)->comm,PETSC_ERR_SUP,"Not done for %D dimension, send us mail petsc-maint@mcs.anl.gov for code",dim);
174   if (isBAIJ) {
175     dd->w = nc;
176     dd->xs = dd->xs*nc;
177     dd->xe = dd->xe*nc;
178     dd->Xs = dd->Xs*nc;
179     dd->Xe = dd->Xe*nc;
180   }
181   PetscFunctionReturn(0);
182 }
183 
184 /* ---------------------------------------------------------------------------------*/
185 
186 #undef __FUNCT__
187 #define __FUNCT__ "DMCreateColoring_DA_2d_MPIAIJ"
188 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
189 {
190   PetscErrorCode         ierr;
191   PetscInt               xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,M,N,dim,s,k,nc,col;
192   PetscInt               ncolors;
193   MPI_Comm               comm;
194   DMDABoundaryType       bx,by;
195   DMDAStencilType        st;
196   ISColoringValue        *colors;
197   DM_DA                  *dd = (DM_DA*)da->data;
198 
199   PetscFunctionBegin;
200   /*
201          nc - number of components per grid point
202          col - number of colors needed in one direction for single component problem
203 
204   */
205   ierr = DMDAGetInfo(da,&dim,&m,&n,0,&M,&N,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
206   col    = 2*s + 1;
207   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
208   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
209   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
210 
211   /* special case as taught to us by Paul Hovland */
212   if (st == DMDA_STENCIL_STAR && s == 1) {
213     ierr = DMCreateColoring_DA_2d_5pt_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
214   } else {
215 
216     if (bx == DMDA_BOUNDARY_PERIODIC && (m % col)){
217       SETERRQ2(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X (%d) is divisible\n\
218                  by 2*stencil_width + 1 (%d)\n", m, col);
219     }
220     if (by == DMDA_BOUNDARY_PERIODIC && (n % col)){
221       SETERRQ2(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y (%d) is divisible\n\
222                  by 2*stencil_width + 1 (%d)\n", n, col);
223     }
224     if (ctype == IS_COLORING_GLOBAL) {
225       if (!dd->localcoloring) {
226 	ierr = PetscMalloc(nc*nx*ny*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
227 	ii = 0;
228 	for (j=ys; j<ys+ny; j++) {
229 	  for (i=xs; i<xs+nx; i++) {
230 	    for (k=0; k<nc; k++) {
231 	      colors[ii++] = k + nc*((i % col) + col*(j % col));
232 	    }
233 	  }
234 	}
235         ncolors = nc + nc*(col-1 + col*(col-1));
236 	ierr = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,&dd->localcoloring);CHKERRQ(ierr);
237       }
238       *coloring = dd->localcoloring;
239     } else if (ctype == IS_COLORING_GHOSTED) {
240       if (!dd->ghostedcoloring) {
241 	ierr = PetscMalloc(nc*gnx*gny*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
242 	ii = 0;
243 	for (j=gys; j<gys+gny; j++) {
244 	  for (i=gxs; i<gxs+gnx; i++) {
245 	    for (k=0; k<nc; k++) {
246 	      /* the complicated stuff is to handle periodic boundaries */
247 	      colors[ii++] = k + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col));
248 	    }
249 	  }
250 	}
251         ncolors = nc + nc*(col - 1 + col*(col-1));
252 	ierr = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,&dd->ghostedcoloring);CHKERRQ(ierr);
253         /* PetscIntView(ncolors,(PetscInt *)colors,0); */
254 
255 	ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_GHOSTED);CHKERRQ(ierr);
256       }
257       *coloring = dd->ghostedcoloring;
258     } else SETERRQ1(((PetscObject)da)->comm,PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
259   }
260   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
261   PetscFunctionReturn(0);
262 }
263 
264 /* ---------------------------------------------------------------------------------*/
265 
266 #undef __FUNCT__
267 #define __FUNCT__ "DMCreateColoring_DA_3d_MPIAIJ"
268 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
269 {
270   PetscErrorCode    ierr;
271   PetscInt          xs,ys,nx,ny,i,j,gxs,gys,gnx,gny,m,n,p,dim,s,k,nc,col,zs,gzs,ii,l,nz,gnz,M,N,P;
272   PetscInt          ncolors;
273   MPI_Comm          comm;
274   DMDABoundaryType  bx,by,bz;
275   DMDAStencilType   st;
276   ISColoringValue   *colors;
277   DM_DA             *dd = (DM_DA*)da->data;
278 
279   PetscFunctionBegin;
280   /*
281          nc - number of components per grid point
282          col - number of colors needed in one direction for single component problem
283 
284   */
285   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
286   col    = 2*s + 1;
287   if (bx == DMDA_BOUNDARY_PERIODIC && (m % col)){
288     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
289                  by 2*stencil_width + 1\n");
290   }
291   if (by == DMDA_BOUNDARY_PERIODIC && (n % col)){
292     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
293                  by 2*stencil_width + 1\n");
294   }
295   if (bz == DMDA_BOUNDARY_PERIODIC && (p % col)){
296     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
297                  by 2*stencil_width + 1\n");
298   }
299 
300   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
301   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
302   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
303 
304   /* create the coloring */
305   if (ctype == IS_COLORING_GLOBAL) {
306     if (!dd->localcoloring) {
307       ierr = PetscMalloc(nc*nx*ny*nz*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
308       ii = 0;
309       for (k=zs; k<zs+nz; k++) {
310         for (j=ys; j<ys+ny; j++) {
311           for (i=xs; i<xs+nx; i++) {
312             for (l=0; l<nc; l++) {
313               colors[ii++] = l + nc*((i % col) + col*(j % col) + col*col*(k % col));
314             }
315           }
316         }
317       }
318       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
319       ierr = ISColoringCreate(comm,ncolors,nc*nx*ny*nz,colors,&dd->localcoloring);CHKERRQ(ierr);
320     }
321     *coloring = dd->localcoloring;
322   } else if (ctype == IS_COLORING_GHOSTED) {
323     if (!dd->ghostedcoloring) {
324       ierr = PetscMalloc(nc*gnx*gny*gnz*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
325       ii = 0;
326       for (k=gzs; k<gzs+gnz; k++) {
327         for (j=gys; j<gys+gny; j++) {
328           for (i=gxs; i<gxs+gnx; i++) {
329             for (l=0; l<nc; l++) {
330               /* the complicated stuff is to handle periodic boundaries */
331               colors[ii++] = l + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col) + col*col*(SetInRange(k,p) % col));
332             }
333           }
334         }
335       }
336       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
337       ierr = ISColoringCreate(comm,ncolors,nc*gnx*gny*gnz,colors,&dd->ghostedcoloring);CHKERRQ(ierr);
338       ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_GHOSTED);CHKERRQ(ierr);
339     }
340     *coloring = dd->ghostedcoloring;
341   } else SETERRQ1(((PetscObject)da)->comm,PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
342   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
343   PetscFunctionReturn(0);
344 }
345 
346 /* ---------------------------------------------------------------------------------*/
347 
348 #undef __FUNCT__
349 #define __FUNCT__ "DMCreateColoring_DA_1d_MPIAIJ"
350 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
351 {
352   PetscErrorCode    ierr;
353   PetscInt          xs,nx,i,i1,gxs,gnx,l,m,M,dim,s,nc,col;
354   PetscInt          ncolors;
355   MPI_Comm          comm;
356   DMDABoundaryType  bx;
357   ISColoringValue   *colors;
358   DM_DA             *dd = (DM_DA*)da->data;
359 
360   PetscFunctionBegin;
361   /*
362          nc - number of components per grid point
363          col - number of colors needed in one direction for single component problem
364 
365   */
366   ierr = DMDAGetInfo(da,&dim,&m,0,0,&M,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr);
367   col    = 2*s + 1;
368 
369   if (bx == DMDA_BOUNDARY_PERIODIC && (m % col)) {
370     SETERRQ2(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points %d is divisible\n\
371                  by 2*stencil_width + 1 %d\n",(int)m,(int)col);
372   }
373 
374   ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
375   ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
376   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
377 
378   /* create the coloring */
379   if (ctype == IS_COLORING_GLOBAL) {
380     if (!dd->localcoloring) {
381       ierr = PetscMalloc(nc*nx*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
382       i1 = 0;
383       for (i=xs; i<xs+nx; i++) {
384         for (l=0; l<nc; l++) {
385           colors[i1++] = l + nc*(i % col);
386         }
387       }
388       ncolors = nc + nc*(col-1);
389       ierr = ISColoringCreate(comm,ncolors,nc*nx,colors,&dd->localcoloring);CHKERRQ(ierr);
390     }
391     *coloring = dd->localcoloring;
392   } else if (ctype == IS_COLORING_GHOSTED) {
393     if (!dd->ghostedcoloring) {
394       ierr = PetscMalloc(nc*gnx*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
395       i1 = 0;
396       for (i=gxs; i<gxs+gnx; i++) {
397         for (l=0; l<nc; l++) {
398           /* the complicated stuff is to handle periodic boundaries */
399           colors[i1++] = l + nc*(SetInRange(i,m) % col);
400         }
401       }
402       ncolors = nc + nc*(col-1);
403       ierr = ISColoringCreate(comm,ncolors,nc*gnx,colors,&dd->ghostedcoloring);CHKERRQ(ierr);
404       ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_GHOSTED);CHKERRQ(ierr);
405     }
406     *coloring = dd->ghostedcoloring;
407   } else SETERRQ1(((PetscObject)da)->comm,PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
408   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
409   PetscFunctionReturn(0);
410 }
411 
412 #undef __FUNCT__
413 #define __FUNCT__ "DMCreateColoring_DA_2d_5pt_MPIAIJ"
414 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
415 {
416   PetscErrorCode    ierr;
417   PetscInt          xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,dim,s,k,nc;
418   PetscInt          ncolors;
419   MPI_Comm          comm;
420   DMDABoundaryType  bx,by;
421   ISColoringValue   *colors;
422   DM_DA             *dd = (DM_DA*)da->data;
423 
424   PetscFunctionBegin;
425   /*
426          nc - number of components per grid point
427          col - number of colors needed in one direction for single component problem
428 
429   */
430   ierr   = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,0);CHKERRQ(ierr);
431   ierr   = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
432   ierr   = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
433   ierr   = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
434 
435   if (bx == DMDA_BOUNDARY_PERIODIC && (m % 5)){
436     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
437                  by 5\n");
438   }
439   if (by == DMDA_BOUNDARY_PERIODIC && (n % 5)){
440     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
441                  by 5\n");
442   }
443 
444   /* create the coloring */
445   if (ctype == IS_COLORING_GLOBAL) {
446     if (!dd->localcoloring) {
447       ierr = PetscMalloc(nc*nx*ny*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
448       ii = 0;
449       for (j=ys; j<ys+ny; j++) {
450 	for (i=xs; i<xs+nx; i++) {
451 	  for (k=0; k<nc; k++) {
452 	    colors[ii++] = k + nc*((3*j+i) % 5);
453 	  }
454 	}
455       }
456       ncolors = 5*nc;
457       ierr = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,&dd->localcoloring);CHKERRQ(ierr);
458     }
459     *coloring = dd->localcoloring;
460   } else if (ctype == IS_COLORING_GHOSTED) {
461     if (!dd->ghostedcoloring) {
462       ierr = PetscMalloc(nc*gnx*gny*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
463       ii = 0;
464       for (j=gys; j<gys+gny; j++) {
465 	for (i=gxs; i<gxs+gnx; i++) {
466 	  for (k=0; k<nc; k++) {
467 	    colors[ii++] = k + nc*((3*SetInRange(j,n) + SetInRange(i,m)) % 5);
468 	  }
469 	}
470       }
471       ncolors = 5*nc;
472       ierr = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,&dd->ghostedcoloring);CHKERRQ(ierr);
473       ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_GHOSTED);CHKERRQ(ierr);
474     }
475     *coloring = dd->ghostedcoloring;
476   } else SETERRQ1(((PetscObject)da)->comm,PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
477   PetscFunctionReturn(0);
478 }
479 
480 /* =========================================================================== */
481 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM,Mat);
482 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM,Mat);
483 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM,Mat);
484 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM,Mat);
485 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM,Mat);
486 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM,Mat);
487 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM,Mat);
488 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM,Mat);
489 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM,Mat);
490 
491 #undef __FUNCT__
492 #define __FUNCT__ "MatSetDM"
493 /*@
494    MatSetDM - Sets the DMDA that is to be used by the HYPRE_StructMatrix PETSc matrix
495 
496    Logically Collective on Mat
497 
498    Input Parameters:
499 +  mat - the matrix
500 -  da - the da
501 
502    Level: intermediate
503 
504 @*/
505 PetscErrorCode  MatSetDM(Mat mat,DM da)
506 {
507   PetscErrorCode ierr;
508 
509   PetscFunctionBegin;
510   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
511   PetscValidHeaderSpecific(da,DM_CLASSID,1);
512   ierr = PetscTryMethod(mat,"MatSetDM_C",(Mat,DM),(mat,da));CHKERRQ(ierr);
513   PetscFunctionReturn(0);
514 }
515 
516 EXTERN_C_BEGIN
517 #undef __FUNCT__
518 #define __FUNCT__ "MatView_MPI_DA"
519 PetscErrorCode  MatView_MPI_DA(Mat A,PetscViewer viewer)
520 {
521   DM             da;
522   PetscErrorCode ierr;
523   const char     *prefix;
524   Mat            Anatural;
525   AO             ao;
526   PetscInt       rstart,rend,*petsc,i;
527   IS             is;
528   MPI_Comm       comm;
529   PetscViewerFormat format;
530 
531   PetscFunctionBegin;
532   /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */
533   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
534   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(0);
535 
536   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
537   ierr = PetscObjectQuery((PetscObject)A,"DM",(PetscObject*)&da);CHKERRQ(ierr);
538   if (!da) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
539 
540   ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);
541   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
542   ierr = PetscMalloc((rend-rstart)*sizeof(PetscInt),&petsc);CHKERRQ(ierr);
543   for (i=rstart; i<rend; i++) petsc[i-rstart] = i;
544   ierr = AOApplicationToPetsc(ao,rend-rstart,petsc);CHKERRQ(ierr);
545   ierr = ISCreateGeneral(comm,rend-rstart,petsc,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
546 
547   /* call viewer on natural ordering */
548   ierr = MatGetSubMatrix(A,is,is,MAT_INITIAL_MATRIX,&Anatural);CHKERRQ(ierr);
549   ierr = ISDestroy(&is);CHKERRQ(ierr);
550   ierr = PetscObjectGetOptionsPrefix((PetscObject)A,&prefix);CHKERRQ(ierr);
551   ierr = PetscObjectSetOptionsPrefix((PetscObject)Anatural,prefix);CHKERRQ(ierr);
552   ierr = PetscObjectSetName((PetscObject)Anatural,((PetscObject)A)->name);CHKERRQ(ierr);
553   ierr = MatView(Anatural,viewer);CHKERRQ(ierr);
554   ierr = MatDestroy(&Anatural);CHKERRQ(ierr);
555   PetscFunctionReturn(0);
556 }
557 EXTERN_C_END
558 
559 EXTERN_C_BEGIN
560 #undef __FUNCT__
561 #define __FUNCT__ "MatLoad_MPI_DA"
562 PetscErrorCode  MatLoad_MPI_DA(Mat A,PetscViewer viewer)
563 {
564   DM             da;
565   PetscErrorCode ierr;
566   Mat            Anatural,Aapp;
567   AO             ao;
568   PetscInt       rstart,rend,*app,i;
569   IS             is;
570   MPI_Comm       comm;
571 
572   PetscFunctionBegin;
573   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
574   ierr = PetscObjectQuery((PetscObject)A,"DM",(PetscObject*)&da);CHKERRQ(ierr);
575   if (!da) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
576 
577   /* Load the matrix in natural ordering */
578   ierr = MatCreate(((PetscObject)A)->comm,&Anatural);CHKERRQ(ierr);
579   ierr = MatSetType(Anatural,((PetscObject)A)->type_name);CHKERRQ(ierr);
580   ierr = MatSetSizes(Anatural,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
581   ierr = MatLoad(Anatural,viewer);CHKERRQ(ierr);
582 
583   /* Map natural ordering to application ordering and create IS */
584   ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);
585   ierr = MatGetOwnershipRange(Anatural,&rstart,&rend);CHKERRQ(ierr);
586   ierr = PetscMalloc((rend-rstart)*sizeof(PetscInt),&app);CHKERRQ(ierr);
587   for (i=rstart; i<rend; i++) app[i-rstart] = i;
588   ierr = AOPetscToApplication(ao,rend-rstart,app);CHKERRQ(ierr);
589   ierr = ISCreateGeneral(comm,rend-rstart,app,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
590 
591   /* Do permutation and replace header */
592   ierr = MatGetSubMatrix(Anatural,is,is,MAT_INITIAL_MATRIX,&Aapp);CHKERRQ(ierr);
593   ierr = MatHeaderReplace(A,Aapp);CHKERRQ(ierr);
594   ierr = ISDestroy(&is);CHKERRQ(ierr);
595   ierr = MatDestroy(&Anatural);CHKERRQ(ierr);
596   PetscFunctionReturn(0);
597 }
598 EXTERN_C_END
599 
600 #undef __FUNCT__
601 #define __FUNCT__ "DMCreateMatrix_DA"
602 PetscErrorCode DMCreateMatrix_DA(DM da, const MatType mtype,Mat *J)
603 {
604   PetscErrorCode ierr;
605   PetscInt       dim,dof,nx,ny,nz,dims[3],starts[3],M,N,P;
606   Mat            A;
607   MPI_Comm       comm;
608   const MatType  Atype;
609   void           (*aij)(void)=PETSC_NULL,(*baij)(void)=PETSC_NULL,(*sbaij)(void)=PETSC_NULL;
610   MatType        ttype[256];
611   PetscBool      flg;
612   PetscMPIInt    size;
613   DM_DA          *dd = (DM_DA*)da->data;
614 
615   PetscFunctionBegin;
616 #ifndef PETSC_USE_DYNAMIC_LIBRARIES
617   ierr = MatInitializePackage(PETSC_NULL);CHKERRQ(ierr);
618 #endif
619   if (!mtype) mtype = MATAIJ;
620   ierr = PetscStrcpy((char*)ttype,mtype);CHKERRQ(ierr);
621   ierr = PetscOptionsBegin(((PetscObject)da)->comm,((PetscObject)da)->prefix,"DMDA options","Mat");CHKERRQ(ierr);
622   ierr = PetscOptionsList("-dm_mat_type","Matrix type","MatSetType",MatList,mtype,(char*)ttype,256,&flg);CHKERRQ(ierr);
623   ierr = PetscOptionsEnd();
624 
625   /*
626                                   m
627           ------------------------------------------------------
628          |                                                     |
629          |                                                     |
630          |               ----------------------                |
631          |               |                    |                |
632       n  |           ny  |                    |                |
633          |               |                    |                |
634          |               .---------------------                |
635          |             (xs,ys)     nx                          |
636          |            .                                        |
637          |         (gxs,gys)                                   |
638          |                                                     |
639           -----------------------------------------------------
640   */
641 
642   /*
643          nc - number of components per grid point
644          col - number of colors needed in one direction for single component problem
645 
646   */
647   ierr = DMDAGetInfo(da,&dim,&M,&N,&P,0,0,0,&dof,0,0,0,0,0);CHKERRQ(ierr);
648   ierr = DMDAGetCorners(da,0,0,0,&nx,&ny,&nz);CHKERRQ(ierr);
649   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
650   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
651   ierr = MatSetSizes(A,dof*nx*ny*nz,dof*nx*ny*nz,dof*M*N*P,dof*M*N*P);CHKERRQ(ierr);
652   ierr = MatSetType(A,(const MatType)ttype);CHKERRQ(ierr);
653   ierr = MatSetDM(A,da);CHKERRQ(ierr);
654   ierr = MatSetFromOptions(A);CHKERRQ(ierr);
655   ierr = MatGetType(A,&Atype);CHKERRQ(ierr);
656   /*
657      We do not provide a getmatrix function in the DMDA operations because
658    the basic DMDA does not know about matrices. We think of DMDA as being more
659    more low-level than matrices. This is kind of cheating but, cause sometimes
660    we think of DMDA has higher level than matrices.
661 
662      We could switch based on Atype (or mtype), but we do not since the
663    specialized setting routines depend only the particular preallocation
664    details of the matrix, not the type itself.
665   */
666   ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJSetPreallocation_C",&aij);CHKERRQ(ierr);
667   if (!aij) {
668     ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJSetPreallocation_C",&aij);CHKERRQ(ierr);
669   }
670   if (!aij) {
671     ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIBAIJSetPreallocation_C",&baij);CHKERRQ(ierr);
672     if (!baij) {
673       ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",&baij);CHKERRQ(ierr);
674     }
675     if (!baij){
676       ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPISBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr);
677       if (!sbaij) {
678         ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr);
679       }
680     }
681   }
682   if (aij) {
683     if (dim == 1) {
684       ierr = DMCreateMatrix_DA_1d_MPIAIJ(da,A);CHKERRQ(ierr);
685     } else if (dim == 2) {
686       if (dd->ofill) {
687         ierr = DMCreateMatrix_DA_2d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
688       } else {
689         ierr = DMCreateMatrix_DA_2d_MPIAIJ(da,A);CHKERRQ(ierr);
690       }
691     } else if (dim == 3) {
692       if (dd->ofill) {
693         ierr = DMCreateMatrix_DA_3d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
694       } else {
695         ierr = DMCreateMatrix_DA_3d_MPIAIJ(da,A);CHKERRQ(ierr);
696       }
697     }
698   } else if (baij) {
699     if (dim == 2) {
700       ierr = DMCreateMatrix_DA_2d_MPIBAIJ(da,A);CHKERRQ(ierr);
701     } else if (dim == 3) {
702       ierr = DMCreateMatrix_DA_3d_MPIBAIJ(da,A);CHKERRQ(ierr);
703     } else {
704       SETERRQ3(((PetscObject)da)->comm,PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension!\n" \
705 	       "Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
706     }
707   } else if (sbaij) {
708     if (dim == 2) {
709       ierr = DMCreateMatrix_DA_2d_MPISBAIJ(da,A);CHKERRQ(ierr);
710     } else if (dim == 3) {
711       ierr = DMCreateMatrix_DA_3d_MPISBAIJ(da,A);CHKERRQ(ierr);
712     } else {
713       SETERRQ3(((PetscObject)da)->comm,PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension!\n" \
714 	       "Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
715     }
716   } else {
717     ISLocalToGlobalMapping ltog,ltogb;
718     ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
719     ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
720     ierr = MatSetUp(A);CHKERRQ(ierr);
721     ierr = MatSetLocalToGlobalMapping(A,ltog,ltog);CHKERRQ(ierr);
722     ierr = MatSetLocalToGlobalMappingBlock(A,ltogb,ltogb);CHKERRQ(ierr);
723   }
724   ierr = DMDAGetGhostCorners(da,&starts[0],&starts[1],&starts[2],&dims[0],&dims[1],&dims[2]);CHKERRQ(ierr);
725   ierr = MatSetStencil(A,dim,dims,starts,dof);CHKERRQ(ierr);
726   ierr = PetscObjectCompose((PetscObject)A,"DM",(PetscObject)da);CHKERRQ(ierr);
727   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
728   if (size > 1) {
729     /* change viewer to display matrix in natural ordering */
730     ierr = MatShellSetOperation(A, MATOP_VIEW, (void (*)(void)) MatView_MPI_DA);CHKERRQ(ierr);
731     ierr = MatShellSetOperation(A, MATOP_LOAD, (void (*)(void)) MatLoad_MPI_DA);CHKERRQ(ierr);
732   }
733   *J = A;
734   PetscFunctionReturn(0);
735 }
736 
737 /* ---------------------------------------------------------------------------------*/
738 #undef __FUNCT__
739 #define __FUNCT__ "DMCreateMatrix_DA_2d_MPIAIJ"
740 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da,Mat J)
741 {
742   PetscErrorCode         ierr;
743   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = PETSC_NULL,k,nc,*rows = PETSC_NULL,col,cnt,l,p;
744   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
745   MPI_Comm               comm;
746   PetscScalar            *values;
747   DMDABoundaryType       bx,by;
748   ISLocalToGlobalMapping ltog,ltogb;
749   DMDAStencilType        st;
750 
751   PetscFunctionBegin;
752   /*
753          nc - number of components per grid point
754          col - number of colors needed in one direction for single component problem
755 
756   */
757   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
758   col = 2*s + 1;
759   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
760   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
761   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
762 
763   ierr = PetscMalloc2(nc,PetscInt,&rows,col*col*nc*nc,PetscInt,&cols);CHKERRQ(ierr);
764   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
765   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
766 
767   /* determine the matrix preallocation information */
768   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
769   for (i=xs; i<xs+nx; i++) {
770 
771     pstart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
772     pend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
773 
774     for (j=ys; j<ys+ny; j++) {
775       slot = i - gxs + gnx*(j - gys);
776 
777       lstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
778       lend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
779 
780       cnt  = 0;
781       for (k=0; k<nc; k++) {
782 	for (l=lstart; l<lend+1; l++) {
783 	  for (p=pstart; p<pend+1; p++) {
784 	    if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
785 	      cols[cnt++]  = k + nc*(slot + gnx*l + p);
786 	    }
787 	  }
788 	}
789 	rows[k] = k + nc*(slot);
790       }
791       ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
792     }
793   }
794   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
795   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
796   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
797   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
798 
799   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
800   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
801 
802   /*
803     For each node in the grid: we get the neighbors in the local (on processor ordering
804     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
805     PETSc ordering.
806   */
807   if (!da->prealloc_only) {
808     ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
809     ierr = PetscMemzero(values,col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
810     for (i=xs; i<xs+nx; i++) {
811 
812       pstart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
813       pend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
814 
815       for (j=ys; j<ys+ny; j++) {
816 	slot = i - gxs + gnx*(j - gys);
817 
818 	lstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
819 	lend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
820 
821 	cnt  = 0;
822 	for (k=0; k<nc; k++) {
823 	  for (l=lstart; l<lend+1; l++) {
824 	    for (p=pstart; p<pend+1; p++) {
825 	      if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
826 		cols[cnt++]  = k + nc*(slot + gnx*l + p);
827 	      }
828 	    }
829 	  }
830 	  rows[k]      = k + nc*(slot);
831 	}
832 	ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
833       }
834     }
835     ierr = PetscFree(values);CHKERRQ(ierr);
836     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
837     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
838   }
839   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
840   PetscFunctionReturn(0);
841 }
842 
843 #undef __FUNCT__
844 #define __FUNCT__ "DMCreateMatrix_DA_2d_MPIAIJ_Fill"
845 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da,Mat J)
846 {
847   PetscErrorCode         ierr;
848   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
849   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt,l,p;
850   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
851   DM_DA                  *dd = (DM_DA*)da->data;
852   PetscInt               ifill_col,*ofill = dd->ofill, *dfill = dd->dfill;
853   MPI_Comm               comm;
854   PetscScalar            *values;
855   DMDABoundaryType       bx,by;
856   ISLocalToGlobalMapping ltog,ltogb;
857   DMDAStencilType        st;
858 
859   PetscFunctionBegin;
860   /*
861          nc - number of components per grid point
862          col - number of colors needed in one direction for single component problem
863 
864   */
865   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
866   col = 2*s + 1;
867   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
868   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
869   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
870 
871   ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscInt),&cols);CHKERRQ(ierr);
872   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
873   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
874 
875   /* determine the matrix preallocation information */
876   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
877   for (i=xs; i<xs+nx; i++) {
878 
879     pstart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
880     pend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
881 
882     for (j=ys; j<ys+ny; j++) {
883       slot = i - gxs + gnx*(j - gys);
884 
885       lstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
886       lend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
887 
888       for (k=0; k<nc; k++) {
889         cnt  = 0;
890 	for (l=lstart; l<lend+1; l++) {
891 	  for (p=pstart; p<pend+1; p++) {
892             if (l || p) {
893 	      if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
894                 for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++)
895 		  cols[cnt++]  = ofill[ifill_col] + nc*(slot + gnx*l + p);
896 	      }
897             } else {
898 	      if (dfill) {
899 		for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++)
900 		  cols[cnt++]  = dfill[ifill_col] + nc*(slot + gnx*l + p);
901 	      } else {
902 		for (ifill_col=0; ifill_col<nc; ifill_col++)
903 		  cols[cnt++]  = ifill_col + nc*(slot + gnx*l + p);
904 	      }
905             }
906 	  }
907 	}
908 	row = k + nc*(slot);
909         ierr = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
910       }
911     }
912   }
913   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
914   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
915   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
916   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
917   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
918 
919   /*
920     For each node in the grid: we get the neighbors in the local (on processor ordering
921     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
922     PETSc ordering.
923   */
924   if (!da->prealloc_only) {
925     ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
926     ierr = PetscMemzero(values,col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
927     for (i=xs; i<xs+nx; i++) {
928 
929       pstart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
930       pend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
931 
932       for (j=ys; j<ys+ny; j++) {
933 	slot = i - gxs + gnx*(j - gys);
934 
935 	lstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
936 	lend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
937 
938 	for (k=0; k<nc; k++) {
939 	  cnt  = 0;
940 	  for (l=lstart; l<lend+1; l++) {
941 	    for (p=pstart; p<pend+1; p++) {
942 	      if (l || p) {
943 		if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
944 		  for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++)
945 		    cols[cnt++]  = ofill[ifill_col] + nc*(slot + gnx*l + p);
946 		}
947 	      } else {
948 		if (dfill) {
949 		  for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++)
950 		    cols[cnt++]  = dfill[ifill_col] + nc*(slot + gnx*l + p);
951 		} else {
952 		  for (ifill_col=0; ifill_col<nc; ifill_col++)
953 		    cols[cnt++]  = ifill_col + nc*(slot + gnx*l + p);
954 		}
955 	      }
956 	    }
957 	  }
958 	  row  = k + nc*(slot);
959 	  ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
960 	}
961       }
962     }
963     ierr = PetscFree(values);CHKERRQ(ierr);
964     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
965     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
966   }
967   ierr = PetscFree(cols);CHKERRQ(ierr);
968   PetscFunctionReturn(0);
969 }
970 
971 /* ---------------------------------------------------------------------------------*/
972 
973 #undef __FUNCT__
974 #define __FUNCT__ "DMCreateMatrix_DA_3d_MPIAIJ"
975 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da,Mat J)
976 {
977   PetscErrorCode         ierr;
978   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
979   PetscInt               m,n,dim,s,*cols = PETSC_NULL,k,nc,*rows = PETSC_NULL,col,cnt,l,p,*dnz = PETSC_NULL,*onz = PETSC_NULL;
980   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
981   MPI_Comm               comm;
982   PetscScalar            *values;
983   DMDABoundaryType       bx,by,bz;
984   ISLocalToGlobalMapping ltog,ltogb;
985   DMDAStencilType        st;
986 
987   PetscFunctionBegin;
988   /*
989          nc - number of components per grid point
990          col - number of colors needed in one direction for single component problem
991 
992   */
993   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
994   col    = 2*s + 1;
995 
996   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
997   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
998   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
999 
1000   ierr = PetscMalloc2(nc,PetscInt,&rows,col*col*col*nc*nc,PetscInt,&cols);CHKERRQ(ierr);
1001   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1002   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1003 
1004   /* determine the matrix preallocation information */
1005   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1006   for (i=xs; i<xs+nx; i++) {
1007     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1008     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1009     for (j=ys; j<ys+ny; j++) {
1010       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1011       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1012       for (k=zs; k<zs+nz; k++) {
1013 	kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1014 	kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1015 
1016 	slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1017 
1018 	cnt  = 0;
1019 	for (l=0; l<nc; l++) {
1020 	  for (ii=istart; ii<iend+1; ii++) {
1021 	    for (jj=jstart; jj<jend+1; jj++) {
1022 	      for (kk=kstart; kk<kend+1; kk++) {
1023 		if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1024 		  cols[cnt++]  = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1025 		}
1026 	      }
1027 	    }
1028 	  }
1029 	  rows[l] = l + nc*(slot);
1030 	}
1031 	ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1032       }
1033     }
1034   }
1035   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1036   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1037   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1038   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1039   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1040   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1041 
1042   /*
1043     For each node in the grid: we get the neighbors in the local (on processor ordering
1044     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1045     PETSc ordering.
1046   */
1047   if (!da->prealloc_only) {
1048     ierr = PetscMalloc(col*col*col*nc*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1049     ierr = PetscMemzero(values,col*col*col*nc*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1050     for (i=xs; i<xs+nx; i++) {
1051       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1052       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1053       for (j=ys; j<ys+ny; j++) {
1054 	jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1055 	jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1056 	for (k=zs; k<zs+nz; k++) {
1057 	  kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1058 	  kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1059 
1060 	  slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1061 
1062 	  cnt  = 0;
1063 	  for (l=0; l<nc; l++) {
1064 	    for (ii=istart; ii<iend+1; ii++) {
1065 	      for (jj=jstart; jj<jend+1; jj++) {
1066 		for (kk=kstart; kk<kend+1; kk++) {
1067 		  if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1068 		    cols[cnt++]  = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1069 		  }
1070 		}
1071 	      }
1072 	    }
1073 	    rows[l]      = l + nc*(slot);
1074 	  }
1075 	  ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1076 	}
1077       }
1078     }
1079     ierr = PetscFree(values);CHKERRQ(ierr);
1080     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1081     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1082   }
1083   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 /* ---------------------------------------------------------------------------------*/
1088 
1089 #undef __FUNCT__
1090 #define __FUNCT__ "DMCreateMatrix_DA_1d_MPIAIJ"
1091 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da,Mat J)
1092 {
1093   PetscErrorCode         ierr;
1094   PetscInt               xs,nx,i,i1,slot,gxs,gnx;
1095   PetscInt               m,dim,s,*cols = PETSC_NULL,nc,*rows = PETSC_NULL,col,cnt,l;
1096   PetscInt               istart,iend;
1097   PetscScalar            *values;
1098   DMDABoundaryType       bx;
1099   ISLocalToGlobalMapping ltog,ltogb;
1100 
1101   PetscFunctionBegin;
1102   /*
1103          nc - number of components per grid point
1104          col - number of colors needed in one direction for single component problem
1105 
1106   */
1107   ierr = DMDAGetInfo(da,&dim,&m,0,0,0,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr);
1108   col    = 2*s + 1;
1109 
1110   ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
1111   ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
1112 
1113   ierr = MatSeqAIJSetPreallocation(J,col*nc,0);CHKERRQ(ierr);
1114   ierr = MatMPIAIJSetPreallocation(J,col*nc,0,col*nc,0);CHKERRQ(ierr);
1115   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1116   ierr = PetscMalloc2(nc,PetscInt,&rows,col*nc*nc,PetscInt,&cols);CHKERRQ(ierr);
1117 
1118   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1119   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1120   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1121   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1122 
1123   /*
1124     For each node in the grid: we get the neighbors in the local (on processor ordering
1125     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1126     PETSc ordering.
1127   */
1128   if (!da->prealloc_only) {
1129     ierr = PetscMalloc(col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1130     ierr = PetscMemzero(values,col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1131     for (i=xs; i<xs+nx; i++) {
1132       istart = PetscMax(-s,gxs - i);
1133       iend   = PetscMin(s,gxs + gnx - i - 1);
1134       slot   = i - gxs;
1135 
1136       cnt  = 0;
1137       for (l=0; l<nc; l++) {
1138 	for (i1=istart; i1<iend+1; i1++) {
1139 	  cols[cnt++] = l + nc*(slot + i1);
1140 	}
1141 	rows[l]      = l + nc*(slot);
1142       }
1143       ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1144     }
1145     ierr = PetscFree(values);CHKERRQ(ierr);
1146     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1147     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1148   }
1149   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1150   PetscFunctionReturn(0);
1151 }
1152 
1153 #undef __FUNCT__
1154 #define __FUNCT__ "DMCreateMatrix_DA_2d_MPIBAIJ"
1155 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da,Mat J)
1156 {
1157   PetscErrorCode         ierr;
1158   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1159   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
1160   PetscInt               istart,iend,jstart,jend,ii,jj;
1161   MPI_Comm               comm;
1162   PetscScalar            *values;
1163   DMDABoundaryType       bx,by;
1164   DMDAStencilType        st;
1165   ISLocalToGlobalMapping ltog,ltogb;
1166 
1167   PetscFunctionBegin;
1168   /*
1169      nc - number of components per grid point
1170      col - number of colors needed in one direction for single component problem
1171   */
1172   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
1173   col = 2*s + 1;
1174 
1175   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
1176   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
1177   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1178 
1179   ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1180 
1181   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1182   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1183 
1184   /* determine the matrix preallocation information */
1185   ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
1186   for (i=xs; i<xs+nx; i++) {
1187     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1188     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1189     for (j=ys; j<ys+ny; j++) {
1190       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1191       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1192       slot = i - gxs + gnx*(j - gys);
1193 
1194       /* Find block columns in block row */
1195       cnt  = 0;
1196       for (ii=istart; ii<iend+1; ii++) {
1197         for (jj=jstart; jj<jend+1; jj++) {
1198           if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1199             cols[cnt++]  = slot + ii + gnx*jj;
1200           }
1201         }
1202       }
1203       ierr = MatPreallocateSetLocal(ltogb,1,&slot,ltogb,cnt,cols,dnz,onz);CHKERRQ(ierr);
1204     }
1205   }
1206   ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1207   ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1208   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1209 
1210   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1211   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1212 
1213   /*
1214     For each node in the grid: we get the neighbors in the local (on processor ordering
1215     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1216     PETSc ordering.
1217   */
1218   if (!da->prealloc_only) {
1219     ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1220     ierr = PetscMemzero(values,col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1221     for (i=xs; i<xs+nx; i++) {
1222       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1223       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1224       for (j=ys; j<ys+ny; j++) {
1225 	jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1226 	jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1227 	slot = i - gxs + gnx*(j - gys);
1228 	cnt  = 0;
1229         for (ii=istart; ii<iend+1; ii++) {
1230           for (jj=jstart; jj<jend+1; jj++) {
1231             if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1232               cols[cnt++]  = slot + ii + gnx*jj;
1233             }
1234           }
1235         }
1236 	ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1237       }
1238     }
1239     ierr = PetscFree(values);CHKERRQ(ierr);
1240     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1241     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1242   }
1243   ierr = PetscFree(cols);CHKERRQ(ierr);
1244   PetscFunctionReturn(0);
1245 }
1246 
1247 #undef __FUNCT__
1248 #define __FUNCT__ "DMCreateMatrix_DA_3d_MPIBAIJ"
1249 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da,Mat J)
1250 {
1251   PetscErrorCode         ierr;
1252   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1253   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
1254   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1255   MPI_Comm               comm;
1256   PetscScalar            *values;
1257   DMDABoundaryType       bx,by,bz;
1258   DMDAStencilType        st;
1259   ISLocalToGlobalMapping ltog,ltogb;
1260 
1261   PetscFunctionBegin;
1262   /*
1263          nc - number of components per grid point
1264          col - number of colors needed in one direction for single component problem
1265 
1266   */
1267   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1268   col    = 2*s + 1;
1269 
1270   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1271   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1272   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1273 
1274   ierr  = PetscMalloc(col*col*col*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1275 
1276   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1277   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1278 
1279   /* determine the matrix preallocation information */
1280   ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1281   for (i=xs; i<xs+nx; i++) {
1282     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1283     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1284     for (j=ys; j<ys+ny; j++) {
1285       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1286       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1287       for (k=zs; k<zs+nz; k++) {
1288 	kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1289 	kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1290 
1291 	slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1292 
1293 	/* Find block columns in block row */
1294 	cnt  = 0;
1295         for (ii=istart; ii<iend+1; ii++) {
1296           for (jj=jstart; jj<jend+1; jj++) {
1297             for (kk=kstart; kk<kend+1; kk++) {
1298               if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1299 		cols[cnt++]  = slot + ii + gnx*jj + gnx*gny*kk;
1300 	      }
1301 	    }
1302 	  }
1303 	}
1304 	ierr = MatPreallocateSetLocal(ltogb,1,&slot,ltogb,cnt,cols,dnz,onz);CHKERRQ(ierr);
1305       }
1306     }
1307   }
1308   ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1309   ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1310   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1311 
1312   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1313   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1314 
1315   /*
1316     For each node in the grid: we get the neighbors in the local (on processor ordering
1317     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1318     PETSc ordering.
1319   */
1320   if (!da->prealloc_only) {
1321     ierr  = PetscMalloc(col*col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1322     ierr  = PetscMemzero(values,col*col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1323     for (i=xs; i<xs+nx; i++) {
1324       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1325       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1326       for (j=ys; j<ys+ny; j++) {
1327 	jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1328 	jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1329 	for (k=zs; k<zs+nz; k++) {
1330 	  kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1331 	  kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1332 
1333 	  slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1334 
1335 	  cnt  = 0;
1336           for (ii=istart; ii<iend+1; ii++) {
1337             for (jj=jstart; jj<jend+1; jj++) {
1338               for (kk=kstart; kk<kend+1; kk++) {
1339                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1340                   cols[cnt++]  = slot + ii + gnx*jj + gnx*gny*kk;
1341                 }
1342               }
1343             }
1344           }
1345 	  ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1346 	}
1347       }
1348     }
1349     ierr = PetscFree(values);CHKERRQ(ierr);
1350     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1351     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1352   }
1353   ierr = PetscFree(cols);CHKERRQ(ierr);
1354   PetscFunctionReturn(0);
1355 }
1356 
1357 #undef __FUNCT__
1358 #define __FUNCT__ "L2GFilterUpperTriangular"
1359 /*
1360   This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to
1361   identify in the local ordering with periodic domain.
1362 */
1363 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog,PetscInt *row,PetscInt *cnt,PetscInt col[])
1364 {
1365   PetscErrorCode ierr;
1366   PetscInt       i,n;
1367 
1368   PetscFunctionBegin;
1369   ierr = ISLocalToGlobalMappingApply(ltog,1,row,row);CHKERRQ(ierr);
1370   ierr = ISLocalToGlobalMappingApply(ltog,*cnt,col,col);CHKERRQ(ierr);
1371   for (i=0,n=0; i<*cnt; i++) {
1372     if (col[i] >= *row) col[n++] = col[i];
1373   }
1374   *cnt = n;
1375   PetscFunctionReturn(0);
1376 }
1377 
1378 #undef __FUNCT__
1379 #define __FUNCT__ "DMCreateMatrix_DA_2d_MPISBAIJ"
1380 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da,Mat J)
1381 {
1382   PetscErrorCode         ierr;
1383   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1384   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
1385   PetscInt               istart,iend,jstart,jend,ii,jj;
1386   MPI_Comm               comm;
1387   PetscScalar            *values;
1388   DMDABoundaryType       bx,by;
1389   DMDAStencilType        st;
1390   ISLocalToGlobalMapping ltog,ltogb;
1391 
1392   PetscFunctionBegin;
1393   /*
1394      nc - number of components per grid point
1395      col - number of colors needed in one direction for single component problem
1396   */
1397   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
1398   col = 2*s + 1;
1399 
1400   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
1401   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
1402   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1403 
1404   ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1405 
1406   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1407   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1408 
1409   /* determine the matrix preallocation information */
1410   ierr = MatPreallocateSymmetricInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
1411   for (i=xs; i<xs+nx; i++) {
1412     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1413     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1414     for (j=ys; j<ys+ny; j++) {
1415       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1416       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1417       slot = i - gxs + gnx*(j - gys);
1418 
1419       /* Find block columns in block row */
1420       cnt  = 0;
1421       for (ii=istart; ii<iend+1; ii++) {
1422         for (jj=jstart; jj<jend+1; jj++) {
1423           if (st == DMDA_STENCIL_BOX || !ii || !jj) {
1424             cols[cnt++]  = slot + ii + gnx*jj;
1425           }
1426         }
1427       }
1428       ierr = L2GFilterUpperTriangular(ltogb,&slot,&cnt,cols);CHKERRQ(ierr);
1429       ierr = MatPreallocateSymmetricSet(slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
1430     }
1431   }
1432   ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1433   ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1434   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1435 
1436   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1437   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1438 
1439   /*
1440     For each node in the grid: we get the neighbors in the local (on processor ordering
1441     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1442     PETSc ordering.
1443   */
1444   if (!da->prealloc_only) {
1445     ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1446     ierr = PetscMemzero(values,col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1447     for (i=xs; i<xs+nx; i++) {
1448       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1449       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1450       for (j=ys; j<ys+ny; j++) {
1451         jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1452         jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1453         slot = i - gxs + gnx*(j - gys);
1454 
1455         /* Find block columns in block row */
1456         cnt  = 0;
1457         for (ii=istart; ii<iend+1; ii++) {
1458           for (jj=jstart; jj<jend+1; jj++) {
1459             if (st == DMDA_STENCIL_BOX || !ii || !jj) {
1460               cols[cnt++]  = slot + ii + gnx*jj;
1461             }
1462           }
1463         }
1464         ierr = L2GFilterUpperTriangular(ltogb,&slot,&cnt,cols);CHKERRQ(ierr);
1465 	ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1466       }
1467     }
1468     ierr = PetscFree(values);CHKERRQ(ierr);
1469     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1470     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1471   }
1472   ierr = PetscFree(cols);CHKERRQ(ierr);
1473   PetscFunctionReturn(0);
1474 }
1475 
1476 #undef __FUNCT__
1477 #define __FUNCT__ "DMCreateMatrix_DA_3d_MPISBAIJ"
1478 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da,Mat J)
1479 {
1480   PetscErrorCode         ierr;
1481   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1482   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
1483   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1484   MPI_Comm               comm;
1485   PetscScalar            *values;
1486   DMDABoundaryType       bx,by,bz;
1487   DMDAStencilType        st;
1488   ISLocalToGlobalMapping ltog,ltogb;
1489 
1490   PetscFunctionBegin;
1491   /*
1492      nc - number of components per grid point
1493      col - number of colors needed in one direction for single component problem
1494   */
1495   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1496   col = 2*s + 1;
1497 
1498   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1499   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1500   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1501 
1502   /* create the matrix */
1503   ierr = PetscMalloc(col*col*col*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1504 
1505   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1506   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1507 
1508   /* determine the matrix preallocation information */
1509   ierr = MatPreallocateSymmetricInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1510   for (i=xs; i<xs+nx; i++) {
1511     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1512     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1513     for (j=ys; j<ys+ny; j++) {
1514       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1515       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1516       for (k=zs; k<zs+nz; k++) {
1517         kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1518 	kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1519 
1520 	slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1521 
1522 	/* Find block columns in block row */
1523 	cnt  = 0;
1524         for (ii=istart; ii<iend+1; ii++) {
1525           for (jj=jstart; jj<jend+1; jj++) {
1526             for (kk=kstart; kk<kend+1; kk++) {
1527               if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
1528                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1529               }
1530             }
1531           }
1532         }
1533         ierr = L2GFilterUpperTriangular(ltogb,&slot,&cnt,cols);CHKERRQ(ierr);
1534         ierr = MatPreallocateSymmetricSet(slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
1535       }
1536     }
1537   }
1538   ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1539   ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1540   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1541 
1542   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1543   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1544 
1545   /*
1546     For each node in the grid: we get the neighbors in the local (on processor ordering
1547     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1548     PETSc ordering.
1549   */
1550   if (!da->prealloc_only) {
1551     ierr = PetscMalloc(col*col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1552     ierr = PetscMemzero(values,col*col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1553     for (i=xs; i<xs+nx; i++) {
1554       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1555       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1556       for (j=ys; j<ys+ny; j++) {
1557         jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1558 	jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1559 	for (k=zs; k<zs+nz; k++) {
1560           kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1561 	  kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1562 
1563 	  slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1564 
1565 	  cnt  = 0;
1566           for (ii=istart; ii<iend+1; ii++) {
1567             for (jj=jstart; jj<jend+1; jj++) {
1568               for (kk=kstart; kk<kend+1; kk++) {
1569                 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
1570 		  cols[cnt++]  = slot + ii + gnx*jj + gnx*gny*kk;
1571 		}
1572 	      }
1573 	    }
1574 	  }
1575           ierr = L2GFilterUpperTriangular(ltogb,&slot,&cnt,cols);CHKERRQ(ierr);
1576           ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1577 	}
1578       }
1579     }
1580     ierr = PetscFree(values);CHKERRQ(ierr);
1581     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1582     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1583   }
1584   ierr = PetscFree(cols);CHKERRQ(ierr);
1585   PetscFunctionReturn(0);
1586 }
1587 
1588 /* ---------------------------------------------------------------------------------*/
1589 
1590 #undef __FUNCT__
1591 #define __FUNCT__ "DMCreateMatrix_DA_3d_MPIAIJ_Fill"
1592 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da,Mat J)
1593 {
1594   PetscErrorCode         ierr;
1595   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1596   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt,l,p,*dnz,*onz;
1597   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1598   DM_DA                  *dd = (DM_DA*)da->data;
1599   PetscInt               ifill_col,*dfill = dd->dfill,*ofill = dd->ofill;
1600   MPI_Comm               comm;
1601   PetscScalar            *values;
1602   DMDABoundaryType       bx,by,bz;
1603   ISLocalToGlobalMapping ltog,ltogb;
1604   DMDAStencilType        st;
1605 
1606   PetscFunctionBegin;
1607   /*
1608          nc - number of components per grid point
1609          col - number of colors needed in one direction for single component problem
1610 
1611   */
1612   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1613   col    = 2*s + 1;
1614   if (bx == DMDA_BOUNDARY_PERIODIC && (m % col)){
1615     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
1616                  by 2*stencil_width + 1\n");
1617   }
1618   if (by == DMDA_BOUNDARY_PERIODIC && (n % col)){
1619     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
1620                  by 2*stencil_width + 1\n");
1621   }
1622   if (bz == DMDA_BOUNDARY_PERIODIC && (p % col)){
1623     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
1624                  by 2*stencil_width + 1\n");
1625   }
1626 
1627   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1628   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1629   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1630 
1631   ierr = PetscMalloc(col*col*col*nc*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1632   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1633   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1634 
1635   /* determine the matrix preallocation information */
1636   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1637 
1638 
1639   for (i=xs; i<xs+nx; i++) {
1640     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1641     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1642     for (j=ys; j<ys+ny; j++) {
1643       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1644       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1645       for (k=zs; k<zs+nz; k++) {
1646         kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1647         kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1648 
1649         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1650 
1651 	for (l=0; l<nc; l++) {
1652 	  cnt  = 0;
1653 	  for (ii=istart; ii<iend+1; ii++) {
1654 	    for (jj=jstart; jj<jend+1; jj++) {
1655 	      for (kk=kstart; kk<kend+1; kk++) {
1656 		if (ii || jj || kk) {
1657 		  if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1658 		    for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++)
1659 		      cols[cnt++]  = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1660 		  }
1661 		} else {
1662 		  if (dfill) {
1663 		    for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++)
1664 		      cols[cnt++]  = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1665 		  } else {
1666 		    for (ifill_col=0; ifill_col<nc; ifill_col++)
1667 		      cols[cnt++]  = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1668 		  }
1669 		}
1670 	      }
1671 	    }
1672 	  }
1673 	  row  = l + nc*(slot);
1674 	  ierr = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1675 	}
1676       }
1677     }
1678   }
1679   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1680   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1681   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1682   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1683   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1684 
1685   /*
1686     For each node in the grid: we get the neighbors in the local (on processor ordering
1687     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1688     PETSc ordering.
1689   */
1690   if (!da->prealloc_only) {
1691     ierr = PetscMalloc(col*col*col*nc*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1692     ierr = PetscMemzero(values,col*col*col*nc*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1693     for (i=xs; i<xs+nx; i++) {
1694       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1695       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1696       for (j=ys; j<ys+ny; j++) {
1697 	jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1698 	jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1699 	for (k=zs; k<zs+nz; k++) {
1700 	  kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1701 	  kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1702 
1703 	  slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1704 
1705 	  for (l=0; l<nc; l++) {
1706 	    cnt  = 0;
1707 	    for (ii=istart; ii<iend+1; ii++) {
1708 	      for (jj=jstart; jj<jend+1; jj++) {
1709 		for (kk=kstart; kk<kend+1; kk++) {
1710 		  if (ii || jj || kk) {
1711 		    if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1712 		      for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++)
1713 			cols[cnt++]  = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1714 		    }
1715 		  } else {
1716 		    if (dfill) {
1717 		      for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++)
1718 			cols[cnt++]  = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1719 		    } else {
1720 		      for (ifill_col=0; ifill_col<nc; ifill_col++)
1721 			cols[cnt++]  = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1722 		    }
1723 		  }
1724 		}
1725 	      }
1726 	    }
1727 	    row  = l + nc*(slot);
1728 	    ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1729 	  }
1730 	}
1731       }
1732     }
1733     ierr = PetscFree(values);CHKERRQ(ierr);
1734     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1735     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1736   }
1737   ierr = PetscFree(cols);CHKERRQ(ierr);
1738   PetscFunctionReturn(0);
1739 }
1740