xref: /petsc/src/dm/impls/da/fdda.c (revision f3e3d7df08ad49fe665df0df50a5f980ce6310e9)
1 
2 #include <private/daimpl.h> /*I      "petscdmda.h"     I*/
3 #include <petscmat.h>         /*I      "petscmat.h"    I*/
4 #include <private/matimpl.h>
5 
6 extern PetscErrorCode DMGetColoring_DA_1d_MPIAIJ(DM,ISColoringType,ISColoring *);
7 extern PetscErrorCode DMGetColoring_DA_2d_MPIAIJ(DM,ISColoringType,ISColoring *);
8 extern PetscErrorCode DMGetColoring_DA_2d_5pt_MPIAIJ(DM,ISColoringType,ISColoring *);
9 extern PetscErrorCode DMGetColoring_DA_3d_MPIAIJ(DM,ISColoringType,ISColoring *);
10 
11 /*
12    For ghost i that may be negative or greater than the upper bound this
13   maps it into the 0:m-1 range using periodicity
14 */
15 #define SetInRange(i,m) ((i < 0) ? m+i:((i >= m) ? i-m:i))
16 
17 #undef __FUNCT__
18 #define __FUNCT__ "DMDASetBlockFills_Private"
19 static PetscErrorCode DMDASetBlockFills_Private(PetscInt *dfill,PetscInt w,PetscInt **rfill)
20 {
21   PetscErrorCode ierr;
22   PetscInt       i,j,nz,*fill;
23 
24   PetscFunctionBegin;
25   if (!dfill) PetscFunctionReturn(0);
26 
27   /* count number nonzeros */
28   nz = 0;
29   for (i=0; i<w; i++) {
30     for (j=0; j<w; j++) {
31       if (dfill[w*i+j]) nz++;
32     }
33   }
34   ierr = PetscMalloc((nz + w + 1)*sizeof(PetscInt),&fill);CHKERRQ(ierr);
35   /* construct modified CSR storage of nonzero structure */
36   nz = w + 1;
37   for (i=0; i<w; i++) {
38     fill[i] = nz;
39     for (j=0; j<w; j++) {
40       if (dfill[w*i+j]) {
41 	fill[nz] = j;
42 	nz++;
43       }
44     }
45   }
46   fill[w] = nz;
47 
48   *rfill = fill;
49   PetscFunctionReturn(0);
50 }
51 
52 #undef __FUNCT__
53 #define __FUNCT__ "DMDASetBlockFills"
54 /*@
55     DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem
56     of the matrix returned by DMGetMatrix().
57 
58     Logically Collective on DMDA
59 
60     Input Parameter:
61 +   da - the distributed array
62 .   dfill - the fill pattern in the diagonal block (may be PETSC_NULL, means use dense block)
63 -   ofill - the fill pattern in the off-diagonal blocks
64 
65 
66     Level: developer
67 
68     Notes: This only makes sense when you are doing multicomponent problems but using the
69        MPIAIJ matrix format
70 
71            The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries
72        representing coupling and 0 entries for missing coupling. For example
73 $             dfill[9] = {1, 0, 0,
74 $                         1, 1, 0,
75 $                         0, 1, 1}
76        means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with
77        itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the
78        diagonal block).
79 
80      DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
81      can be represented in the dfill, ofill format
82 
83    Contributed by Glenn Hammond
84 
85 .seealso DMGetMatrix(), DMDASetGetMatrix(), DMDASetMatPreallocateOnly()
86 
87 @*/
88 PetscErrorCode  DMDASetBlockFills(DM da,PetscInt *dfill,PetscInt *ofill)
89 {
90   DM_DA          *dd = (DM_DA*)da->data;
91   PetscErrorCode ierr;
92 
93   PetscFunctionBegin;
94   ierr = DMDASetBlockFills_Private(dfill,dd->w,&dd->dfill);CHKERRQ(ierr);
95   ierr = DMDASetBlockFills_Private(ofill,dd->w,&dd->ofill);CHKERRQ(ierr);
96   PetscFunctionReturn(0);
97 }
98 
99 
100 #undef __FUNCT__
101 #define __FUNCT__ "DMGetColoring_DA"
102 PetscErrorCode  DMGetColoring_DA(DM da,ISColoringType ctype,const MatType mtype,ISColoring *coloring)
103 {
104   PetscErrorCode   ierr;
105   PetscInt         dim,m,n,p,nc;
106   DMDABoundaryType bx,by,bz;
107   MPI_Comm         comm;
108   PetscMPIInt      size;
109   PetscBool        isBAIJ;
110   DM_DA            *dd = (DM_DA*)da->data;
111 
112   PetscFunctionBegin;
113   /*
114                                   m
115           ------------------------------------------------------
116          |                                                     |
117          |                                                     |
118          |               ----------------------                |
119          |               |                    |                |
120       n  |           yn  |                    |                |
121          |               |                    |                |
122          |               .---------------------                |
123          |             (xs,ys)     xn                          |
124          |            .                                        |
125          |         (gxs,gys)                                   |
126          |                                                     |
127           -----------------------------------------------------
128   */
129 
130   /*
131          nc - number of components per grid point
132          col - number of colors needed in one direction for single component problem
133 
134   */
135   ierr = DMDAGetInfo(da,&dim,0,0,0,&m,&n,&p,&nc,0,&bx,&by,&bz,0);CHKERRQ(ierr);
136 
137   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
138   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
139   if (ctype == IS_COLORING_GHOSTED){
140     if (size == 1) {
141       ctype = IS_COLORING_GLOBAL;
142     } else if (dim > 1){
143       if ((m==1 && bx == DMDA_BOUNDARY_PERIODIC) || (n==1 && by == DMDA_BOUNDARY_PERIODIC) || (p==1 && bz == DMDA_BOUNDARY_PERIODIC)){
144         SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"IS_COLORING_GHOSTED cannot be used for periodic boundary condition having both ends of the domain  on the same process");
145       }
146     }
147   }
148 
149   /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ
150      matrices is for the blocks, not the individual matrix elements  */
151   ierr = PetscStrcmp(mtype,MATMPIBAIJ,&isBAIJ);CHKERRQ(ierr);
152   if (!isBAIJ) {ierr = PetscStrcmp(mtype,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);}
153   if (isBAIJ) {
154     dd->w = 1;
155     dd->xs = dd->xs/nc;
156     dd->xe = dd->xe/nc;
157     dd->Xs = dd->Xs/nc;
158     dd->Xe = dd->Xe/nc;
159   }
160 
161   /*
162      We do not provide a getcoloring function in the DMDA operations because
163    the basic DMDA does not know about matrices. We think of DMDA as being more
164    more low-level then matrices.
165   */
166   if (dim == 1) {
167     ierr = DMGetColoring_DA_1d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
168   } else if (dim == 2) {
169     ierr =  DMGetColoring_DA_2d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
170   } else if (dim == 3) {
171     ierr =  DMGetColoring_DA_3d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
172   } else SETERRQ1(((PetscObject)da)->comm,PETSC_ERR_SUP,"Not done for %D dimension, send us mail petsc-maint@mcs.anl.gov for code",dim);
173   if (isBAIJ) {
174     dd->w = nc;
175     dd->xs = dd->xs*nc;
176     dd->xe = dd->xe*nc;
177     dd->Xs = dd->Xs*nc;
178     dd->Xe = dd->Xe*nc;
179   }
180   PetscFunctionReturn(0);
181 }
182 
183 /* ---------------------------------------------------------------------------------*/
184 
185 #undef __FUNCT__
186 #define __FUNCT__ "DMGetColoring_DA_2d_MPIAIJ"
187 PetscErrorCode DMGetColoring_DA_2d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
188 {
189   PetscErrorCode         ierr;
190   PetscInt               xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,M,N,dim,s,k,nc,col;
191   PetscInt               ncolors;
192   MPI_Comm               comm;
193   DMDABoundaryType       bx,by;
194   DMDAStencilType        st;
195   ISColoringValue        *colors;
196   DM_DA                  *dd = (DM_DA*)da->data;
197 
198   PetscFunctionBegin;
199   /*
200          nc - number of components per grid point
201          col - number of colors needed in one direction for single component problem
202 
203   */
204   ierr = DMDAGetInfo(da,&dim,&m,&n,0,&M,&N,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
205   col    = 2*s + 1;
206   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
207   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
208   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
209 
210   /* special case as taught to us by Paul Hovland */
211   if (st == DMDA_STENCIL_STAR && s == 1) {
212     ierr = DMGetColoring_DA_2d_5pt_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
213   } else {
214 
215     if (bx == DMDA_BOUNDARY_PERIODIC && (m % col)){
216       SETERRQ2(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X (%d) is divisible\n\
217                  by 2*stencil_width + 1 (%d)\n", m, col);
218     }
219     if (by == DMDA_BOUNDARY_PERIODIC && (n % col)){
220       SETERRQ2(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y (%d) is divisible\n\
221                  by 2*stencil_width + 1 (%d)\n", n, col);
222     }
223     if (ctype == IS_COLORING_GLOBAL) {
224       if (!dd->localcoloring) {
225 	ierr = PetscMalloc(nc*nx*ny*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
226 	ii = 0;
227 	for (j=ys; j<ys+ny; j++) {
228 	  for (i=xs; i<xs+nx; i++) {
229 	    for (k=0; k<nc; k++) {
230 	      colors[ii++] = k + nc*((i % col) + col*(j % col));
231 	    }
232 	  }
233 	}
234         ncolors = nc + nc*(col-1 + col*(col-1));
235 	ierr = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,&dd->localcoloring);CHKERRQ(ierr);
236       }
237       *coloring = dd->localcoloring;
238     } else if (ctype == IS_COLORING_GHOSTED) {
239       if (!dd->ghostedcoloring) {
240 	ierr = PetscMalloc(nc*gnx*gny*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
241 	ii = 0;
242 	for (j=gys; j<gys+gny; j++) {
243 	  for (i=gxs; i<gxs+gnx; i++) {
244 	    for (k=0; k<nc; k++) {
245 	      /* the complicated stuff is to handle periodic boundaries */
246 	      colors[ii++] = k + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col));
247 	    }
248 	  }
249 	}
250         ncolors = nc + nc*(col - 1 + col*(col-1));
251 	ierr = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,&dd->ghostedcoloring);CHKERRQ(ierr);
252         /* PetscIntView(ncolors,(PetscInt *)colors,0); */
253 
254 	ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_GHOSTED);CHKERRQ(ierr);
255       }
256       *coloring = dd->ghostedcoloring;
257     } else SETERRQ1(((PetscObject)da)->comm,PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
258   }
259   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
260   PetscFunctionReturn(0);
261 }
262 
263 /* ---------------------------------------------------------------------------------*/
264 
265 #undef __FUNCT__
266 #define __FUNCT__ "DMGetColoring_DA_3d_MPIAIJ"
267 PetscErrorCode DMGetColoring_DA_3d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
268 {
269   PetscErrorCode    ierr;
270   PetscInt          xs,ys,nx,ny,i,j,gxs,gys,gnx,gny,m,n,p,dim,s,k,nc,col,zs,gzs,ii,l,nz,gnz,M,N,P;
271   PetscInt          ncolors;
272   MPI_Comm          comm;
273   DMDABoundaryType  bx,by,bz;
274   DMDAStencilType   st;
275   ISColoringValue   *colors;
276   DM_DA             *dd = (DM_DA*)da->data;
277 
278   PetscFunctionBegin;
279   /*
280          nc - number of components per grid point
281          col - number of colors needed in one direction for single component problem
282 
283   */
284   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
285   col    = 2*s + 1;
286   if (bx == DMDA_BOUNDARY_PERIODIC && (m % col)){
287     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
288                  by 2*stencil_width + 1\n");
289   }
290   if (by == DMDA_BOUNDARY_PERIODIC && (n % col)){
291     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
292                  by 2*stencil_width + 1\n");
293   }
294   if (bz == DMDA_BOUNDARY_PERIODIC && (p % col)){
295     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
296                  by 2*stencil_width + 1\n");
297   }
298 
299   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
300   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
301   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
302 
303   /* create the coloring */
304   if (ctype == IS_COLORING_GLOBAL) {
305     if (!dd->localcoloring) {
306       ierr = PetscMalloc(nc*nx*ny*nz*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
307       ii = 0;
308       for (k=zs; k<zs+nz; k++) {
309         for (j=ys; j<ys+ny; j++) {
310           for (i=xs; i<xs+nx; i++) {
311             for (l=0; l<nc; l++) {
312               colors[ii++] = l + nc*((i % col) + col*(j % col) + col*col*(k % col));
313             }
314           }
315         }
316       }
317       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
318       ierr = ISColoringCreate(comm,ncolors,nc*nx*ny*nz,colors,&dd->localcoloring);CHKERRQ(ierr);
319     }
320     *coloring = dd->localcoloring;
321   } else if (ctype == IS_COLORING_GHOSTED) {
322     if (!dd->ghostedcoloring) {
323       ierr = PetscMalloc(nc*gnx*gny*gnz*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
324       ii = 0;
325       for (k=gzs; k<gzs+gnz; k++) {
326         for (j=gys; j<gys+gny; j++) {
327           for (i=gxs; i<gxs+gnx; i++) {
328             for (l=0; l<nc; l++) {
329               /* the complicated stuff is to handle periodic boundaries */
330               colors[ii++] = l + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col) + col*col*(SetInRange(k,p) % col));
331             }
332           }
333         }
334       }
335       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
336       ierr = ISColoringCreate(comm,ncolors,nc*gnx*gny*gnz,colors,&dd->ghostedcoloring);CHKERRQ(ierr);
337       ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_GHOSTED);CHKERRQ(ierr);
338     }
339     *coloring = dd->ghostedcoloring;
340   } else SETERRQ1(((PetscObject)da)->comm,PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
341   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
342   PetscFunctionReturn(0);
343 }
344 
345 /* ---------------------------------------------------------------------------------*/
346 
347 #undef __FUNCT__
348 #define __FUNCT__ "DMGetColoring_DA_1d_MPIAIJ"
349 PetscErrorCode DMGetColoring_DA_1d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
350 {
351   PetscErrorCode    ierr;
352   PetscInt          xs,nx,i,i1,gxs,gnx,l,m,M,dim,s,nc,col;
353   PetscInt          ncolors;
354   MPI_Comm          comm;
355   DMDABoundaryType  bx;
356   ISColoringValue   *colors;
357   DM_DA             *dd = (DM_DA*)da->data;
358 
359   PetscFunctionBegin;
360   /*
361          nc - number of components per grid point
362          col - number of colors needed in one direction for single component problem
363 
364   */
365   ierr = DMDAGetInfo(da,&dim,&m,0,0,&M,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr);
366   col    = 2*s + 1;
367 
368   if (bx == DMDA_BOUNDARY_PERIODIC && (m % col)) {
369     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points is divisible\n\
370                  by 2*stencil_width + 1\n");
371   }
372 
373   ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
374   ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
375   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
376 
377   /* create the coloring */
378   if (ctype == IS_COLORING_GLOBAL) {
379     if (!dd->localcoloring) {
380       ierr = PetscMalloc(nc*nx*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
381       i1 = 0;
382       for (i=xs; i<xs+nx; i++) {
383         for (l=0; l<nc; l++) {
384           colors[i1++] = l + nc*(i % col);
385         }
386       }
387       ncolors = nc + nc*(col-1);
388       ierr = ISColoringCreate(comm,ncolors,nc*nx,colors,&dd->localcoloring);CHKERRQ(ierr);
389     }
390     *coloring = dd->localcoloring;
391   } else if (ctype == IS_COLORING_GHOSTED) {
392     if (!dd->ghostedcoloring) {
393       ierr = PetscMalloc(nc*gnx*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
394       i1 = 0;
395       for (i=gxs; i<gxs+gnx; i++) {
396         for (l=0; l<nc; l++) {
397           /* the complicated stuff is to handle periodic boundaries */
398           colors[i1++] = l + nc*(SetInRange(i,m) % col);
399         }
400       }
401       ncolors = nc + nc*(col-1);
402       ierr = ISColoringCreate(comm,ncolors,nc*gnx,colors,&dd->ghostedcoloring);CHKERRQ(ierr);
403       ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_GHOSTED);CHKERRQ(ierr);
404     }
405     *coloring = dd->ghostedcoloring;
406   } else SETERRQ1(((PetscObject)da)->comm,PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
407   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
408   PetscFunctionReturn(0);
409 }
410 
411 #undef __FUNCT__
412 #define __FUNCT__ "DMGetColoring_DA_2d_5pt_MPIAIJ"
413 PetscErrorCode DMGetColoring_DA_2d_5pt_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
414 {
415   PetscErrorCode    ierr;
416   PetscInt          xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,dim,s,k,nc;
417   PetscInt          ncolors;
418   MPI_Comm          comm;
419   DMDABoundaryType  bx,by;
420   ISColoringValue   *colors;
421   DM_DA             *dd = (DM_DA*)da->data;
422 
423   PetscFunctionBegin;
424   /*
425          nc - number of components per grid point
426          col - number of colors needed in one direction for single component problem
427 
428   */
429   ierr   = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,0);CHKERRQ(ierr);
430   ierr   = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
431   ierr   = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
432   ierr   = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
433 
434   if (bx == DMDA_BOUNDARY_PERIODIC && (m % 5)){
435     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
436                  by 5\n");
437   }
438   if (by == DMDA_BOUNDARY_PERIODIC && (n % 5)){
439     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
440                  by 5\n");
441   }
442 
443   /* create the coloring */
444   if (ctype == IS_COLORING_GLOBAL) {
445     if (!dd->localcoloring) {
446       ierr = PetscMalloc(nc*nx*ny*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
447       ii = 0;
448       for (j=ys; j<ys+ny; j++) {
449 	for (i=xs; i<xs+nx; i++) {
450 	  for (k=0; k<nc; k++) {
451 	    colors[ii++] = k + nc*((3*j+i) % 5);
452 	  }
453 	}
454       }
455       ncolors = 5*nc;
456       ierr = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,&dd->localcoloring);CHKERRQ(ierr);
457     }
458     *coloring = dd->localcoloring;
459   } else if (ctype == IS_COLORING_GHOSTED) {
460     if (!dd->ghostedcoloring) {
461       ierr = PetscMalloc(nc*gnx*gny*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
462       ii = 0;
463       for (j=gys; j<gys+gny; j++) {
464 	for (i=gxs; i<gxs+gnx; i++) {
465 	  for (k=0; k<nc; k++) {
466 	    colors[ii++] = k + nc*((3*SetInRange(j,n) + SetInRange(i,m)) % 5);
467 	  }
468 	}
469       }
470       ncolors = 5*nc;
471       ierr = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,&dd->ghostedcoloring);CHKERRQ(ierr);
472       ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_GHOSTED);CHKERRQ(ierr);
473     }
474     *coloring = dd->ghostedcoloring;
475   } else SETERRQ1(((PetscObject)da)->comm,PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
476   PetscFunctionReturn(0);
477 }
478 
479 /* =========================================================================== */
480 extern PetscErrorCode DMGetMatrix_DA_1d_MPIAIJ(DM,Mat);
481 extern PetscErrorCode DMGetMatrix_DA_2d_MPIAIJ(DM,Mat);
482 extern PetscErrorCode DMGetMatrix_DA_2d_MPIAIJ_Fill(DM,Mat);
483 extern PetscErrorCode DMGetMatrix_DA_3d_MPIAIJ(DM,Mat);
484 extern PetscErrorCode DMGetMatrix_DA_3d_MPIAIJ_Fill(DM,Mat);
485 extern PetscErrorCode DMGetMatrix_DA_2d_MPIBAIJ(DM,Mat);
486 extern PetscErrorCode DMGetMatrix_DA_3d_MPIBAIJ(DM,Mat);
487 extern PetscErrorCode DMGetMatrix_DA_2d_MPISBAIJ(DM,Mat);
488 extern PetscErrorCode DMGetMatrix_DA_3d_MPISBAIJ(DM,Mat);
489 
490 #undef __FUNCT__
491 #define __FUNCT__ "MatSetDM"
492 /*@
493    MatSetDM - Sets the DMDA that is to be used by the HYPRE_StructMatrix PETSc matrix
494 
495    Logically Collective on Mat
496 
497    Input Parameters:
498 +  mat - the matrix
499 -  da - the da
500 
501    Level: intermediate
502 
503 @*/
504 PetscErrorCode  MatSetDM(Mat mat,DM da)
505 {
506   PetscErrorCode ierr;
507 
508   PetscFunctionBegin;
509   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
510   PetscValidHeaderSpecific(da,DM_CLASSID,1);
511   ierr = PetscTryMethod(mat,"MatSetDM_C",(Mat,DM),(mat,da));CHKERRQ(ierr);
512   PetscFunctionReturn(0);
513 }
514 
515 EXTERN_C_BEGIN
516 #undef __FUNCT__
517 #define __FUNCT__ "MatView_MPI_DA"
518 PetscErrorCode  MatView_MPI_DA(Mat A,PetscViewer viewer)
519 {
520   DM             da;
521   PetscErrorCode ierr;
522   const char     *prefix;
523   Mat            Anatural;
524   AO             ao;
525   PetscInt       rstart,rend,*petsc,i;
526   IS             is;
527   MPI_Comm       comm;
528 
529   PetscFunctionBegin;
530   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
531   ierr = PetscObjectQuery((PetscObject)A,"DM",(PetscObject*)&da);CHKERRQ(ierr);
532   if (!da) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
533 
534   ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);
535   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
536   ierr = PetscMalloc((rend-rstart)*sizeof(PetscInt),&petsc);CHKERRQ(ierr);
537   for (i=rstart; i<rend; i++) petsc[i-rstart] = i;
538   ierr = AOApplicationToPetsc(ao,rend-rstart,petsc);CHKERRQ(ierr);
539   ierr = ISCreateGeneral(comm,rend-rstart,petsc,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
540 
541   /* call viewer on natural ordering */
542   ierr = MatGetSubMatrix(A,is,is,MAT_INITIAL_MATRIX,&Anatural);CHKERRQ(ierr);
543   ierr = ISDestroy(&is);CHKERRQ(ierr);
544   ierr = PetscObjectGetOptionsPrefix((PetscObject)A,&prefix);CHKERRQ(ierr);
545   ierr = PetscObjectSetOptionsPrefix((PetscObject)Anatural,prefix);CHKERRQ(ierr);
546   ierr = PetscObjectSetName((PetscObject)Anatural,((PetscObject)A)->name);CHKERRQ(ierr);
547   ierr = MatView(Anatural,viewer);CHKERRQ(ierr);
548   ierr = MatDestroy(&Anatural);CHKERRQ(ierr);
549   PetscFunctionReturn(0);
550 }
551 EXTERN_C_END
552 
553 EXTERN_C_BEGIN
554 #undef __FUNCT__
555 #define __FUNCT__ "MatLoad_MPI_DA"
556 PetscErrorCode  MatLoad_MPI_DA(Mat A,PetscViewer viewer)
557 {
558   DM             da;
559   PetscErrorCode ierr;
560   Mat            Anatural,Aapp;
561   AO             ao;
562   PetscInt       rstart,rend,*app,i;
563   IS             is;
564   MPI_Comm       comm;
565 
566   PetscFunctionBegin;
567   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
568   ierr = PetscObjectQuery((PetscObject)A,"DM",(PetscObject*)&da);CHKERRQ(ierr);
569   if (!da) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
570 
571   /* Load the matrix in natural ordering */
572   ierr = MatCreate(((PetscObject)A)->comm,&Anatural);CHKERRQ(ierr);
573   ierr = MatSetType(Anatural,((PetscObject)A)->type_name);CHKERRQ(ierr);
574   ierr = MatSetSizes(Anatural,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
575   ierr = MatLoad(Anatural,viewer);CHKERRQ(ierr);
576 
577   /* Map natural ordering to application ordering and create IS */
578   ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);
579   ierr = MatGetOwnershipRange(Anatural,&rstart,&rend);CHKERRQ(ierr);
580   ierr = PetscMalloc((rend-rstart)*sizeof(PetscInt),&app);CHKERRQ(ierr);
581   for (i=rstart; i<rend; i++) app[i-rstart] = i;
582   ierr = AOPetscToApplication(ao,rend-rstart,app);CHKERRQ(ierr);
583   ierr = ISCreateGeneral(comm,rend-rstart,app,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
584 
585   /* Do permutation and replace header */
586   ierr = MatGetSubMatrix(Anatural,is,is,MAT_INITIAL_MATRIX,&Aapp);CHKERRQ(ierr);
587   ierr = MatHeaderReplace(A,Aapp);CHKERRQ(ierr);
588   ierr = ISDestroy(&is);CHKERRQ(ierr);
589   ierr = MatDestroy(&Anatural);CHKERRQ(ierr);
590   PetscFunctionReturn(0);
591 }
592 EXTERN_C_END
593 
594 #undef __FUNCT__
595 #define __FUNCT__ "DMGetMatrix_DA"
596 PetscErrorCode  DMGetMatrix_DA(DM da, const MatType mtype,Mat *J)
597 {
598   PetscErrorCode ierr;
599   PetscInt       dim,dof,nx,ny,nz,dims[3],starts[3],M,N,P;
600   Mat            A;
601   MPI_Comm       comm;
602   const MatType  Atype;
603   void           (*aij)(void)=PETSC_NULL,(*baij)(void)=PETSC_NULL,(*sbaij)(void)=PETSC_NULL;
604   MatType        ttype[256];
605   PetscBool      flg;
606   PetscMPIInt    size;
607   DM_DA          *dd = (DM_DA*)da->data;
608 
609   PetscFunctionBegin;
610 #ifndef PETSC_USE_DYNAMIC_LIBRARIES
611   ierr = MatInitializePackage(PETSC_NULL);CHKERRQ(ierr);
612 #endif
613   if (!mtype) mtype = MATAIJ;
614   ierr = PetscStrcpy((char*)ttype,mtype);CHKERRQ(ierr);
615   ierr = PetscOptionsBegin(((PetscObject)da)->comm,((PetscObject)da)->prefix,"DMDA options","Mat");CHKERRQ(ierr);
616   ierr = PetscOptionsList("-da_mat_type","Matrix type","MatSetType",MatList,mtype,(char*)ttype,256,&flg);CHKERRQ(ierr);
617   ierr = PetscOptionsEnd();
618 
619   /*
620                                   m
621           ------------------------------------------------------
622          |                                                     |
623          |                                                     |
624          |               ----------------------                |
625          |               |                    |                |
626       n  |           ny  |                    |                |
627          |               |                    |                |
628          |               .---------------------                |
629          |             (xs,ys)     nx                          |
630          |            .                                        |
631          |         (gxs,gys)                                   |
632          |                                                     |
633           -----------------------------------------------------
634   */
635 
636   /*
637          nc - number of components per grid point
638          col - number of colors needed in one direction for single component problem
639 
640   */
641   ierr = DMDAGetInfo(da,&dim,&M,&N,&P,0,0,0,&dof,0,0,0,0,0);CHKERRQ(ierr);
642   ierr = DMDAGetCorners(da,0,0,0,&nx,&ny,&nz);CHKERRQ(ierr);
643   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
644   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
645   ierr = MatSetSizes(A,dof*nx*ny*nz,dof*nx*ny*nz,dof*M*N*P,dof*M*N*P);CHKERRQ(ierr);
646   ierr = MatSetType(A,(const MatType)ttype);CHKERRQ(ierr);
647   ierr = MatSetDM(A,da);CHKERRQ(ierr);
648   ierr = MatSetFromOptions(A);CHKERRQ(ierr);
649   ierr = MatGetType(A,&Atype);CHKERRQ(ierr);
650   /*
651      We do not provide a getmatrix function in the DMDA operations because
652    the basic DMDA does not know about matrices. We think of DMDA as being more
653    more low-level than matrices. This is kind of cheating but, cause sometimes
654    we think of DMDA has higher level than matrices.
655 
656      We could switch based on Atype (or mtype), but we do not since the
657    specialized setting routines depend only the particular preallocation
658    details of the matrix, not the type itself.
659   */
660   ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJSetPreallocation_C",&aij);CHKERRQ(ierr);
661   if (!aij) {
662     ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJSetPreallocation_C",&aij);CHKERRQ(ierr);
663   }
664   if (!aij) {
665     ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIBAIJSetPreallocation_C",&baij);CHKERRQ(ierr);
666     if (!baij) {
667       ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",&baij);CHKERRQ(ierr);
668     }
669     if (!baij){
670       ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPISBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr);
671       if (!sbaij) {
672         ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr);
673       }
674     }
675   }
676   if (aij) {
677     if (dim == 1) {
678       ierr = DMGetMatrix_DA_1d_MPIAIJ(da,A);CHKERRQ(ierr);
679     } else if (dim == 2) {
680       if (dd->ofill) {
681         ierr = DMGetMatrix_DA_2d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
682       } else {
683         ierr = DMGetMatrix_DA_2d_MPIAIJ(da,A);CHKERRQ(ierr);
684       }
685     } else if (dim == 3) {
686       if (dd->ofill) {
687         ierr = DMGetMatrix_DA_3d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
688       } else {
689         ierr = DMGetMatrix_DA_3d_MPIAIJ(da,A);CHKERRQ(ierr);
690       }
691     }
692   } else if (baij) {
693     if (dim == 2) {
694       ierr = DMGetMatrix_DA_2d_MPIBAIJ(da,A);CHKERRQ(ierr);
695     } else if (dim == 3) {
696       ierr = DMGetMatrix_DA_3d_MPIBAIJ(da,A);CHKERRQ(ierr);
697     } else {
698       SETERRQ3(((PetscObject)da)->comm,PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension!\n" \
699 	       "Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
700     }
701   } else if (sbaij) {
702     if (dim == 2) {
703       ierr = DMGetMatrix_DA_2d_MPISBAIJ(da,A);CHKERRQ(ierr);
704     } else if (dim == 3) {
705       ierr = DMGetMatrix_DA_3d_MPISBAIJ(da,A);CHKERRQ(ierr);
706     } else {
707       SETERRQ3(((PetscObject)da)->comm,PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension!\n" \
708 	       "Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
709     }
710   }
711   ierr = DMDAGetGhostCorners(da,&starts[0],&starts[1],&starts[2],&dims[0],&dims[1],&dims[2]);CHKERRQ(ierr);
712   ierr = MatSetStencil(A,dim,dims,starts,dof);CHKERRQ(ierr);
713   ierr = PetscObjectCompose((PetscObject)A,"DM",(PetscObject)da);CHKERRQ(ierr);
714   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
715   if (size > 1) {
716     /* change viewer to display matrix in natural ordering */
717     ierr = MatShellSetOperation(A, MATOP_VIEW, (void (*)(void)) MatView_MPI_DA);CHKERRQ(ierr);
718     ierr = MatShellSetOperation(A, MATOP_LOAD, (void (*)(void)) MatLoad_MPI_DA);CHKERRQ(ierr);
719   }
720   *J = A;
721   PetscFunctionReturn(0);
722 }
723 
724 /* ---------------------------------------------------------------------------------*/
725 #undef __FUNCT__
726 #define __FUNCT__ "DMGetMatrix_DA_2d_MPIAIJ"
727 PetscErrorCode DMGetMatrix_DA_2d_MPIAIJ(DM da,Mat J)
728 {
729   PetscErrorCode         ierr;
730   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = PETSC_NULL,k,nc,*rows = PETSC_NULL,col,cnt,l,p;
731   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
732   MPI_Comm               comm;
733   PetscScalar            *values;
734   DMDABoundaryType       bx,by;
735   ISLocalToGlobalMapping ltog,ltogb;
736   DMDAStencilType        st;
737 
738   PetscFunctionBegin;
739   /*
740          nc - number of components per grid point
741          col - number of colors needed in one direction for single component problem
742 
743   */
744   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
745   col = 2*s + 1;
746   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
747   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
748   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
749 
750   ierr = PetscMalloc2(nc,PetscInt,&rows,col*col*nc*nc,PetscInt,&cols);CHKERRQ(ierr);
751   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
752   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
753 
754   /* determine the matrix preallocation information */
755   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
756   for (i=xs; i<xs+nx; i++) {
757 
758     pstart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
759     pend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
760 
761     for (j=ys; j<ys+ny; j++) {
762       slot = i - gxs + gnx*(j - gys);
763 
764       lstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
765       lend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
766 
767       cnt  = 0;
768       for (k=0; k<nc; k++) {
769 	for (l=lstart; l<lend+1; l++) {
770 	  for (p=pstart; p<pend+1; p++) {
771 	    if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
772 	      cols[cnt++]  = k + nc*(slot + gnx*l + p);
773 	    }
774 	  }
775 	}
776 	rows[k] = k + nc*(slot);
777       }
778       ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
779     }
780   }
781   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
782   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
783   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
784   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
785 
786   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
787   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
788 
789   /*
790     For each node in the grid: we get the neighbors in the local (on processor ordering
791     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
792     PETSc ordering.
793   */
794   if (!da->prealloc_only) {
795     ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
796     ierr = PetscMemzero(values,col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
797     for (i=xs; i<xs+nx; i++) {
798 
799       pstart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
800       pend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
801 
802       for (j=ys; j<ys+ny; j++) {
803 	slot = i - gxs + gnx*(j - gys);
804 
805 	lstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
806 	lend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
807 
808 	cnt  = 0;
809 	for (k=0; k<nc; k++) {
810 	  for (l=lstart; l<lend+1; l++) {
811 	    for (p=pstart; p<pend+1; p++) {
812 	      if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
813 		cols[cnt++]  = k + nc*(slot + gnx*l + p);
814 	      }
815 	    }
816 	  }
817 	  rows[k]      = k + nc*(slot);
818 	}
819 	ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
820       }
821     }
822     ierr = PetscFree(values);CHKERRQ(ierr);
823     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
824     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
825   }
826   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
827   PetscFunctionReturn(0);
828 }
829 
830 #undef __FUNCT__
831 #define __FUNCT__ "DMGetMatrix_DA_2d_MPIAIJ_Fill"
832 PetscErrorCode DMGetMatrix_DA_2d_MPIAIJ_Fill(DM da,Mat J)
833 {
834   PetscErrorCode         ierr;
835   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
836   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt,l,p;
837   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
838   DM_DA                  *dd = (DM_DA*)da->data;
839   PetscInt               ifill_col,*ofill = dd->ofill, *dfill = dd->dfill;
840   MPI_Comm               comm;
841   PetscScalar            *values;
842   DMDABoundaryType       bx,by;
843   ISLocalToGlobalMapping ltog,ltogb;
844   DMDAStencilType        st;
845 
846   PetscFunctionBegin;
847   /*
848          nc - number of components per grid point
849          col - number of colors needed in one direction for single component problem
850 
851   */
852   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
853   col = 2*s + 1;
854   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
855   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
856   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
857 
858   ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscInt),&cols);CHKERRQ(ierr);
859   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
860   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
861 
862   /* determine the matrix preallocation information */
863   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
864   for (i=xs; i<xs+nx; i++) {
865 
866     pstart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
867     pend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
868 
869     for (j=ys; j<ys+ny; j++) {
870       slot = i - gxs + gnx*(j - gys);
871 
872       lstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
873       lend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
874 
875       for (k=0; k<nc; k++) {
876         cnt  = 0;
877 	for (l=lstart; l<lend+1; l++) {
878 	  for (p=pstart; p<pend+1; p++) {
879             if (l || p) {
880 	      if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
881                 for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++)
882 		  cols[cnt++]  = ofill[ifill_col] + nc*(slot + gnx*l + p);
883 	      }
884             } else {
885 	      if (dfill) {
886 		for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++)
887 		  cols[cnt++]  = dfill[ifill_col] + nc*(slot + gnx*l + p);
888 	      } else {
889 		for (ifill_col=0; ifill_col<nc; ifill_col++)
890 		  cols[cnt++]  = ifill_col + nc*(slot + gnx*l + p);
891 	      }
892             }
893 	  }
894 	}
895 	row = k + nc*(slot);
896         ierr = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
897       }
898     }
899   }
900   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
901   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
902   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
903   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
904   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
905 
906   /*
907     For each node in the grid: we get the neighbors in the local (on processor ordering
908     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
909     PETSc ordering.
910   */
911   if (!da->prealloc_only) {
912     ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
913     ierr = PetscMemzero(values,col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
914     for (i=xs; i<xs+nx; i++) {
915 
916       pstart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
917       pend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
918 
919       for (j=ys; j<ys+ny; j++) {
920 	slot = i - gxs + gnx*(j - gys);
921 
922 	lstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
923 	lend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
924 
925 	for (k=0; k<nc; k++) {
926 	  cnt  = 0;
927 	  for (l=lstart; l<lend+1; l++) {
928 	    for (p=pstart; p<pend+1; p++) {
929 	      if (l || p) {
930 		if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
931 		  for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++)
932 		    cols[cnt++]  = ofill[ifill_col] + nc*(slot + gnx*l + p);
933 		}
934 	      } else {
935 		if (dfill) {
936 		  for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++)
937 		    cols[cnt++]  = dfill[ifill_col] + nc*(slot + gnx*l + p);
938 		} else {
939 		  for (ifill_col=0; ifill_col<nc; ifill_col++)
940 		    cols[cnt++]  = ifill_col + nc*(slot + gnx*l + p);
941 		}
942 	      }
943 	    }
944 	  }
945 	  row  = k + nc*(slot);
946 	  ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
947 	}
948       }
949     }
950     ierr = PetscFree(values);CHKERRQ(ierr);
951     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
952     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
953   }
954   ierr = PetscFree(cols);CHKERRQ(ierr);
955   PetscFunctionReturn(0);
956 }
957 
958 /* ---------------------------------------------------------------------------------*/
959 
960 #undef __FUNCT__
961 #define __FUNCT__ "DMGetMatrix_DA_3d_MPIAIJ"
962 PetscErrorCode DMGetMatrix_DA_3d_MPIAIJ(DM da,Mat J)
963 {
964   PetscErrorCode         ierr;
965   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
966   PetscInt               m,n,dim,s,*cols = PETSC_NULL,k,nc,*rows = PETSC_NULL,col,cnt,l,p,*dnz = PETSC_NULL,*onz = PETSC_NULL;
967   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
968   MPI_Comm               comm;
969   PetscScalar            *values;
970   DMDABoundaryType       bx,by,bz;
971   ISLocalToGlobalMapping ltog,ltogb;
972   DMDAStencilType        st;
973 
974   PetscFunctionBegin;
975   /*
976          nc - number of components per grid point
977          col - number of colors needed in one direction for single component problem
978 
979   */
980   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
981   col    = 2*s + 1;
982 
983   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
984   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
985   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
986 
987   ierr = PetscMalloc2(nc,PetscInt,&rows,col*col*col*nc*nc,PetscInt,&cols);CHKERRQ(ierr);
988   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
989   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
990 
991   /* determine the matrix preallocation information */
992   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
993   for (i=xs; i<xs+nx; i++) {
994     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
995     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
996     for (j=ys; j<ys+ny; j++) {
997       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
998       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
999       for (k=zs; k<zs+nz; k++) {
1000 	kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1001 	kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1002 
1003 	slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1004 
1005 	cnt  = 0;
1006 	for (l=0; l<nc; l++) {
1007 	  for (ii=istart; ii<iend+1; ii++) {
1008 	    for (jj=jstart; jj<jend+1; jj++) {
1009 	      for (kk=kstart; kk<kend+1; kk++) {
1010 		if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1011 		  cols[cnt++]  = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1012 		}
1013 	      }
1014 	    }
1015 	  }
1016 	  rows[l] = l + nc*(slot);
1017 	}
1018 	ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1019       }
1020     }
1021   }
1022   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1023   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1024   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1025   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1026   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1027   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1028 
1029   /*
1030     For each node in the grid: we get the neighbors in the local (on processor ordering
1031     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1032     PETSc ordering.
1033   */
1034   if (!da->prealloc_only) {
1035     ierr = PetscMalloc(col*col*col*nc*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1036     ierr = PetscMemzero(values,col*col*col*nc*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1037     for (i=xs; i<xs+nx; i++) {
1038       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1039       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1040       for (j=ys; j<ys+ny; j++) {
1041 	jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1042 	jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1043 	for (k=zs; k<zs+nz; k++) {
1044 	  kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1045 	  kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1046 
1047 	  slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1048 
1049 	  cnt  = 0;
1050 	  for (l=0; l<nc; l++) {
1051 	    for (ii=istart; ii<iend+1; ii++) {
1052 	      for (jj=jstart; jj<jend+1; jj++) {
1053 		for (kk=kstart; kk<kend+1; kk++) {
1054 		  if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1055 		    cols[cnt++]  = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1056 		  }
1057 		}
1058 	      }
1059 	    }
1060 	    rows[l]      = l + nc*(slot);
1061 	  }
1062 	  ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1063 	}
1064       }
1065     }
1066     ierr = PetscFree(values);CHKERRQ(ierr);
1067     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1068     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1069   }
1070   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1071   PetscFunctionReturn(0);
1072 }
1073 
1074 /* ---------------------------------------------------------------------------------*/
1075 
1076 #undef __FUNCT__
1077 #define __FUNCT__ "DMGetMatrix_DA_1d_MPIAIJ"
1078 PetscErrorCode DMGetMatrix_DA_1d_MPIAIJ(DM da,Mat J)
1079 {
1080   PetscErrorCode         ierr;
1081   PetscInt               xs,nx,i,i1,slot,gxs,gnx;
1082   PetscInt               m,dim,s,*cols = PETSC_NULL,nc,*rows = PETSC_NULL,col,cnt,l;
1083   PetscInt               istart,iend;
1084   PetscScalar            *values;
1085   DMDABoundaryType       bx;
1086   ISLocalToGlobalMapping ltog,ltogb;
1087 
1088   PetscFunctionBegin;
1089   /*
1090          nc - number of components per grid point
1091          col - number of colors needed in one direction for single component problem
1092 
1093   */
1094   ierr = DMDAGetInfo(da,&dim,&m,0,0,0,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr);
1095   col    = 2*s + 1;
1096 
1097   ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
1098   ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
1099 
1100   ierr = MatSeqAIJSetPreallocation(J,col*nc,0);CHKERRQ(ierr);
1101   ierr = MatMPIAIJSetPreallocation(J,col*nc,0,col*nc,0);CHKERRQ(ierr);
1102   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1103   ierr = PetscMalloc2(nc,PetscInt,&rows,col*nc*nc,PetscInt,&cols);CHKERRQ(ierr);
1104 
1105   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1106   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1107   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1108   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1109 
1110   /*
1111     For each node in the grid: we get the neighbors in the local (on processor ordering
1112     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1113     PETSc ordering.
1114   */
1115   if (!da->prealloc_only) {
1116     ierr = PetscMalloc(col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1117     ierr = PetscMemzero(values,col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1118     for (i=xs; i<xs+nx; i++) {
1119       istart = PetscMax(-s,gxs - i);
1120       iend   = PetscMin(s,gxs + gnx - i - 1);
1121       slot   = i - gxs;
1122 
1123       cnt  = 0;
1124       for (l=0; l<nc; l++) {
1125 	for (i1=istart; i1<iend+1; i1++) {
1126 	  cols[cnt++] = l + nc*(slot + i1);
1127 	}
1128 	rows[l]      = l + nc*(slot);
1129       }
1130       ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1131     }
1132     ierr = PetscFree(values);CHKERRQ(ierr);
1133     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1134     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1135   }
1136   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1137   PetscFunctionReturn(0);
1138 }
1139 
1140 #undef __FUNCT__
1141 #define __FUNCT__ "DMGetMatrix_DA_2d_MPIBAIJ"
1142 PetscErrorCode DMGetMatrix_DA_2d_MPIBAIJ(DM da,Mat J)
1143 {
1144   PetscErrorCode         ierr;
1145   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1146   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
1147   PetscInt               istart,iend,jstart,jend,ii,jj;
1148   MPI_Comm               comm;
1149   PetscScalar            *values;
1150   DMDABoundaryType       bx,by;
1151   DMDAStencilType        st;
1152   ISLocalToGlobalMapping ltog,ltogb;
1153 
1154   PetscFunctionBegin;
1155   /*
1156      nc - number of components per grid point
1157      col - number of colors needed in one direction for single component problem
1158   */
1159   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
1160   col = 2*s + 1;
1161 
1162   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
1163   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
1164   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1165 
1166   ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1167 
1168   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1169   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1170 
1171   /* determine the matrix preallocation information */
1172   ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
1173   for (i=xs; i<xs+nx; i++) {
1174     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1175     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1176     for (j=ys; j<ys+ny; j++) {
1177       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1178       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1179       slot = i - gxs + gnx*(j - gys);
1180 
1181       /* Find block columns in block row */
1182       cnt  = 0;
1183       for (ii=istart; ii<iend+1; ii++) {
1184         for (jj=jstart; jj<jend+1; jj++) {
1185           if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1186             cols[cnt++]  = slot + ii + gnx*jj;
1187           }
1188         }
1189       }
1190       ierr = MatPreallocateSetLocal(ltogb,1,&slot,ltogb,cnt,cols,dnz,onz);CHKERRQ(ierr);
1191     }
1192   }
1193   ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1194   ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1195   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1196 
1197   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1198   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1199 
1200   /*
1201     For each node in the grid: we get the neighbors in the local (on processor ordering
1202     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1203     PETSc ordering.
1204   */
1205   if (!da->prealloc_only) {
1206     ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1207     ierr = PetscMemzero(values,col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1208     for (i=xs; i<xs+nx; i++) {
1209       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1210       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1211       for (j=ys; j<ys+ny; j++) {
1212 	jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1213 	jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1214 	slot = i - gxs + gnx*(j - gys);
1215 	cnt  = 0;
1216         for (ii=istart; ii<iend+1; ii++) {
1217           for (jj=jstart; jj<jend+1; jj++) {
1218             if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1219               cols[cnt++]  = slot + ii + gnx*jj;
1220             }
1221           }
1222         }
1223 	ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1224       }
1225     }
1226     ierr = PetscFree(values);CHKERRQ(ierr);
1227     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1228     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1229   }
1230   ierr = PetscFree(cols);CHKERRQ(ierr);
1231   PetscFunctionReturn(0);
1232 }
1233 
1234 #undef __FUNCT__
1235 #define __FUNCT__ "DMGetMatrix_DA_3d_MPIBAIJ"
1236 PetscErrorCode DMGetMatrix_DA_3d_MPIBAIJ(DM da,Mat J)
1237 {
1238   PetscErrorCode         ierr;
1239   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1240   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
1241   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1242   MPI_Comm               comm;
1243   PetscScalar            *values;
1244   DMDABoundaryType       bx,by,bz;
1245   DMDAStencilType        st;
1246   ISLocalToGlobalMapping ltog,ltogb;
1247 
1248   PetscFunctionBegin;
1249   /*
1250          nc - number of components per grid point
1251          col - number of colors needed in one direction for single component problem
1252 
1253   */
1254   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1255   col    = 2*s + 1;
1256 
1257   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1258   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1259   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1260 
1261   ierr  = PetscMalloc(col*col*col*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1262 
1263   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1264   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1265 
1266   /* determine the matrix preallocation information */
1267   ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1268   for (i=xs; i<xs+nx; i++) {
1269     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1270     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1271     for (j=ys; j<ys+ny; j++) {
1272       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1273       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1274       for (k=zs; k<zs+nz; k++) {
1275 	kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1276 	kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1277 
1278 	slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1279 
1280 	/* Find block columns in block row */
1281 	cnt  = 0;
1282         for (ii=istart; ii<iend+1; ii++) {
1283           for (jj=jstart; jj<jend+1; jj++) {
1284             for (kk=kstart; kk<kend+1; kk++) {
1285               if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1286 		cols[cnt++]  = slot + ii + gnx*jj + gnx*gny*kk;
1287 	      }
1288 	    }
1289 	  }
1290 	}
1291 	ierr = MatPreallocateSetLocal(ltogb,1,&slot,ltogb,cnt,cols,dnz,onz);CHKERRQ(ierr);
1292       }
1293     }
1294   }
1295   ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1296   ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1297   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1298 
1299   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1300   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1301 
1302   /*
1303     For each node in the grid: we get the neighbors in the local (on processor ordering
1304     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1305     PETSc ordering.
1306   */
1307   if (!da->prealloc_only) {
1308     ierr  = PetscMalloc(col*col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1309     ierr  = PetscMemzero(values,col*col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1310     for (i=xs; i<xs+nx; i++) {
1311       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1312       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1313       for (j=ys; j<ys+ny; j++) {
1314 	jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1315 	jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1316 	for (k=zs; k<zs+nz; k++) {
1317 	  kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1318 	  kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1319 
1320 	  slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1321 
1322 	  cnt  = 0;
1323           for (ii=istart; ii<iend+1; ii++) {
1324             for (jj=jstart; jj<jend+1; jj++) {
1325               for (kk=kstart; kk<kend+1; kk++) {
1326                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1327                   cols[cnt++]  = slot + ii + gnx*jj + gnx*gny*kk;
1328                 }
1329               }
1330             }
1331           }
1332 	  ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1333 	}
1334       }
1335     }
1336     ierr = PetscFree(values);CHKERRQ(ierr);
1337     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1338     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1339   }
1340   ierr = PetscFree(cols);CHKERRQ(ierr);
1341   PetscFunctionReturn(0);
1342 }
1343 
1344 #undef __FUNCT__
1345 #define __FUNCT__ "L2GFilterUpperTriangular"
1346 /*
1347   This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to
1348   identify in the local ordering with periodic domain.
1349 */
1350 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog,PetscInt *row,PetscInt *cnt,PetscInt col[])
1351 {
1352   PetscErrorCode ierr;
1353   PetscInt       i,n;
1354 
1355   PetscFunctionBegin;
1356   ierr = ISLocalToGlobalMappingApply(ltog,1,row,row);CHKERRQ(ierr);
1357   ierr = ISLocalToGlobalMappingApply(ltog,*cnt,col,col);CHKERRQ(ierr);
1358   for (i=0,n=0; i<*cnt; i++) {
1359     if (col[i] >= *row) col[n++] = col[i];
1360   }
1361   *cnt = n;
1362   PetscFunctionReturn(0);
1363 }
1364 
1365 #undef __FUNCT__
1366 #define __FUNCT__ "DMGetMatrix_DA_2d_MPISBAIJ"
1367 PetscErrorCode DMGetMatrix_DA_2d_MPISBAIJ(DM da,Mat J)
1368 {
1369   PetscErrorCode         ierr;
1370   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1371   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
1372   PetscInt               istart,iend,jstart,jend,ii,jj;
1373   MPI_Comm               comm;
1374   PetscScalar            *values;
1375   DMDABoundaryType       bx,by;
1376   DMDAStencilType        st;
1377   ISLocalToGlobalMapping ltog,ltogb;
1378 
1379   PetscFunctionBegin;
1380   /*
1381      nc - number of components per grid point
1382      col - number of colors needed in one direction for single component problem
1383   */
1384   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
1385   col = 2*s + 1;
1386 
1387   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
1388   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
1389   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1390 
1391   ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1392 
1393   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1394   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1395 
1396   /* determine the matrix preallocation information */
1397   ierr = MatPreallocateSymmetricInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
1398   for (i=xs; i<xs+nx; i++) {
1399     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1400     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1401     for (j=ys; j<ys+ny; j++) {
1402       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1403       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1404       slot = i - gxs + gnx*(j - gys);
1405 
1406       /* Find block columns in block row */
1407       cnt  = 0;
1408       for (ii=istart; ii<iend+1; ii++) {
1409         for (jj=jstart; jj<jend+1; jj++) {
1410           if (st == DMDA_STENCIL_BOX || !ii || !jj) {
1411             cols[cnt++]  = slot + ii + gnx*jj;
1412           }
1413         }
1414       }
1415       ierr = L2GFilterUpperTriangular(ltogb,&slot,&cnt,cols);CHKERRQ(ierr);
1416       ierr = MatPreallocateSymmetricSet(slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
1417     }
1418   }
1419   ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1420   ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1421   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1422 
1423   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1424   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1425 
1426   /*
1427     For each node in the grid: we get the neighbors in the local (on processor ordering
1428     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1429     PETSc ordering.
1430   */
1431   if (!da->prealloc_only) {
1432     ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1433     ierr = PetscMemzero(values,col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1434     for (i=xs; i<xs+nx; i++) {
1435       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1436       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1437       for (j=ys; j<ys+ny; j++) {
1438         jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1439         jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1440         slot = i - gxs + gnx*(j - gys);
1441 
1442         /* Find block columns in block row */
1443         cnt  = 0;
1444         for (ii=istart; ii<iend+1; ii++) {
1445           for (jj=jstart; jj<jend+1; jj++) {
1446             if (st == DMDA_STENCIL_BOX || !ii || !jj) {
1447               cols[cnt++]  = slot + ii + gnx*jj;
1448             }
1449           }
1450         }
1451         ierr = L2GFilterUpperTriangular(ltogb,&slot,&cnt,cols);CHKERRQ(ierr);
1452 	ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1453       }
1454     }
1455     ierr = PetscFree(values);CHKERRQ(ierr);
1456     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1457     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1458   }
1459   ierr = PetscFree(cols);CHKERRQ(ierr);
1460   PetscFunctionReturn(0);
1461 }
1462 
1463 #undef __FUNCT__
1464 #define __FUNCT__ "DMGetMatrix_DA_3d_MPISBAIJ"
1465 PetscErrorCode DMGetMatrix_DA_3d_MPISBAIJ(DM da,Mat J)
1466 {
1467   PetscErrorCode         ierr;
1468   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1469   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
1470   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1471   MPI_Comm               comm;
1472   PetscScalar            *values;
1473   DMDABoundaryType       bx,by,bz;
1474   DMDAStencilType        st;
1475   ISLocalToGlobalMapping ltog,ltogb;
1476 
1477   PetscFunctionBegin;
1478   /*
1479      nc - number of components per grid point
1480      col - number of colors needed in one direction for single component problem
1481   */
1482   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1483   col = 2*s + 1;
1484 
1485   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1486   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1487   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1488 
1489   /* create the matrix */
1490   ierr = PetscMalloc(col*col*col*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1491 
1492   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1493   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1494 
1495   /* determine the matrix preallocation information */
1496   ierr = MatPreallocateSymmetricInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1497   for (i=xs; i<xs+nx; i++) {
1498     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1499     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1500     for (j=ys; j<ys+ny; j++) {
1501       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1502       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1503       for (k=zs; k<zs+nz; k++) {
1504         kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1505 	kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1506 
1507 	slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1508 
1509 	/* Find block columns in block row */
1510 	cnt  = 0;
1511         for (ii=istart; ii<iend+1; ii++) {
1512           for (jj=jstart; jj<jend+1; jj++) {
1513             for (kk=kstart; kk<kend+1; kk++) {
1514               if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
1515                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1516               }
1517             }
1518           }
1519         }
1520         ierr = L2GFilterUpperTriangular(ltogb,&slot,&cnt,cols);CHKERRQ(ierr);
1521         ierr = MatPreallocateSymmetricSet(slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
1522       }
1523     }
1524   }
1525   ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1526   ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1527   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1528 
1529   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1530   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1531 
1532   /*
1533     For each node in the grid: we get the neighbors in the local (on processor ordering
1534     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1535     PETSc ordering.
1536   */
1537   if (!da->prealloc_only) {
1538     ierr = PetscMalloc(col*col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1539     ierr = PetscMemzero(values,col*col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1540     for (i=xs; i<xs+nx; i++) {
1541       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1542       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1543       for (j=ys; j<ys+ny; j++) {
1544         jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1545 	jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1546 	for (k=zs; k<zs+nz; k++) {
1547           kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1548 	  kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1549 
1550 	  slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1551 
1552 	  cnt  = 0;
1553           for (ii=istart; ii<iend+1; ii++) {
1554             for (jj=jstart; jj<jend+1; jj++) {
1555               for (kk=kstart; kk<kend+1; kk++) {
1556                 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
1557 		  cols[cnt++]  = slot + ii + gnx*jj + gnx*gny*kk;
1558 		}
1559 	      }
1560 	    }
1561 	  }
1562           ierr = L2GFilterUpperTriangular(ltogb,&slot,&cnt,cols);CHKERRQ(ierr);
1563           ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1564 	}
1565       }
1566     }
1567     ierr = PetscFree(values);CHKERRQ(ierr);
1568     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1569     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1570   }
1571   ierr = PetscFree(cols);CHKERRQ(ierr);
1572   PetscFunctionReturn(0);
1573 }
1574 
1575 /* ---------------------------------------------------------------------------------*/
1576 
1577 #undef __FUNCT__
1578 #define __FUNCT__ "DMGetMatrix_DA_3d_MPIAIJ_Fill"
1579 PetscErrorCode DMGetMatrix_DA_3d_MPIAIJ_Fill(DM da,Mat J)
1580 {
1581   PetscErrorCode         ierr;
1582   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1583   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt,l,p,*dnz,*onz;
1584   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1585   DM_DA                  *dd = (DM_DA*)da->data;
1586   PetscInt               ifill_col,*dfill = dd->dfill,*ofill = dd->ofill;
1587   MPI_Comm               comm;
1588   PetscScalar            *values;
1589   DMDABoundaryType       bx,by,bz;
1590   ISLocalToGlobalMapping ltog,ltogb;
1591   DMDAStencilType        st;
1592 
1593   PetscFunctionBegin;
1594   /*
1595          nc - number of components per grid point
1596          col - number of colors needed in one direction for single component problem
1597 
1598   */
1599   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1600   col    = 2*s + 1;
1601   if (bx == DMDA_BOUNDARY_PERIODIC && (m % col)){
1602     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
1603                  by 2*stencil_width + 1\n");
1604   }
1605   if (by == DMDA_BOUNDARY_PERIODIC && (n % col)){
1606     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
1607                  by 2*stencil_width + 1\n");
1608   }
1609   if (bz == DMDA_BOUNDARY_PERIODIC && (p % col)){
1610     SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
1611                  by 2*stencil_width + 1\n");
1612   }
1613 
1614   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1615   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1616   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1617 
1618   ierr = PetscMalloc(col*col*col*nc*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1619   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1620   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1621 
1622   /* determine the matrix preallocation information */
1623   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1624 
1625 
1626   for (i=xs; i<xs+nx; i++) {
1627     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1628     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1629     for (j=ys; j<ys+ny; j++) {
1630       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1631       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1632       for (k=zs; k<zs+nz; k++) {
1633         kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1634         kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1635 
1636         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1637 
1638 	for (l=0; l<nc; l++) {
1639 	  cnt  = 0;
1640 	  for (ii=istart; ii<iend+1; ii++) {
1641 	    for (jj=jstart; jj<jend+1; jj++) {
1642 	      for (kk=kstart; kk<kend+1; kk++) {
1643 		if (ii || jj || kk) {
1644 		  if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1645 		    for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++)
1646 		      cols[cnt++]  = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1647 		  }
1648 		} else {
1649 		  if (dfill) {
1650 		    for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++)
1651 		      cols[cnt++]  = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1652 		  } else {
1653 		    for (ifill_col=0; ifill_col<nc; ifill_col++)
1654 		      cols[cnt++]  = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1655 		  }
1656 		}
1657 	      }
1658 	    }
1659 	  }
1660 	  row  = l + nc*(slot);
1661 	  ierr = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1662 	}
1663       }
1664     }
1665   }
1666   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1667   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1668   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1669   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1670   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1671 
1672   /*
1673     For each node in the grid: we get the neighbors in the local (on processor ordering
1674     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1675     PETSc ordering.
1676   */
1677   if (!da->prealloc_only) {
1678     ierr = PetscMalloc(col*col*col*nc*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1679     ierr = PetscMemzero(values,col*col*col*nc*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1680     for (i=xs; i<xs+nx; i++) {
1681       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1682       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1683       for (j=ys; j<ys+ny; j++) {
1684 	jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1685 	jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1686 	for (k=zs; k<zs+nz; k++) {
1687 	  kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1688 	  kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1689 
1690 	  slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1691 
1692 	  for (l=0; l<nc; l++) {
1693 	    cnt  = 0;
1694 	    for (ii=istart; ii<iend+1; ii++) {
1695 	      for (jj=jstart; jj<jend+1; jj++) {
1696 		for (kk=kstart; kk<kend+1; kk++) {
1697 		  if (ii || jj || kk) {
1698 		    if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1699 		      for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++)
1700 			cols[cnt++]  = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1701 		    }
1702 		  } else {
1703 		    if (dfill) {
1704 		      for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++)
1705 			cols[cnt++]  = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1706 		    } else {
1707 		      for (ifill_col=0; ifill_col<nc; ifill_col++)
1708 			cols[cnt++]  = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1709 		    }
1710 		  }
1711 		}
1712 	      }
1713 	    }
1714 	    row  = l + nc*(slot);
1715 	    ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1716 	  }
1717 	}
1718       }
1719     }
1720     ierr = PetscFree(values);CHKERRQ(ierr);
1721     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1722     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1723   }
1724   ierr = PetscFree(cols);CHKERRQ(ierr);
1725   PetscFunctionReturn(0);
1726 }
1727