xref: /petsc/src/dm/impls/da/fdda.c (revision 1147fc2a6bf7922defbbca15fc6c33f234803ff0)
1 
2 #include <petsc-private/daimpl.h> /*I      "petscdmda.h"     I*/
3 #include <petscmat.h>
4 #include <petsc-private/matimpl.h>
5 
6 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM,ISColoringType,ISColoring *);
7 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM,ISColoringType,ISColoring *);
8 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM,ISColoringType,ISColoring *);
9 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM,ISColoringType,ISColoring *);
10 
11 /*
12    For ghost i that may be negative or greater than the upper bound this
13   maps it into the 0:m-1 range using periodicity
14 */
15 #define SetInRange(i,m) ((i < 0) ? m+i:((i >= m) ? i-m:i))
16 
17 #undef __FUNCT__
18 #define __FUNCT__ "DMDASetBlockFills_Private"
19 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill,PetscInt w,PetscInt **rfill)
20 {
21   PetscErrorCode ierr;
22   PetscInt       i,j,nz,*fill;
23 
24   PetscFunctionBegin;
25   if (!dfill) PetscFunctionReturn(0);
26 
27   /* count number nonzeros */
28   nz = 0;
29   for (i=0; i<w; i++) {
30     for (j=0; j<w; j++) {
31       if (dfill[w*i+j]) nz++;
32     }
33   }
34   ierr = PetscMalloc((nz + w + 1)*sizeof(PetscInt),&fill);CHKERRQ(ierr);
35   /* construct modified CSR storage of nonzero structure */
36   /*  fill[0 -- w] marks starts of each row of column indices (and end of last row)
37    so fill[1] - fill[0] gives number of nonzeros in first row etc */
38   nz = w + 1;
39   for (i=0; i<w; i++) {
40     fill[i] = nz;
41     for (j=0; j<w; j++) {
42       if (dfill[w*i+j]) {
43         fill[nz] = j;
44         nz++;
45       }
46     }
47   }
48   fill[w] = nz;
49 
50   *rfill = fill;
51   PetscFunctionReturn(0);
52 }
53 
54 #undef __FUNCT__
55 #define __FUNCT__ "DMDASetBlockFills"
56 /*@
57     DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem
58     of the matrix returned by DMCreateMatrix().
59 
60     Logically Collective on DMDA
61 
62     Input Parameter:
63 +   da - the distributed array
64 .   dfill - the fill pattern in the diagonal block (may be PETSC_NULL, means use dense block)
65 -   ofill - the fill pattern in the off-diagonal blocks
66 
67 
68     Level: developer
69 
70     Notes: This only makes sense when you are doing multicomponent problems but using the
71        MPIAIJ matrix format
72 
73            The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries
74        representing coupling and 0 entries for missing coupling. For example
75 $             dfill[9] = {1, 0, 0,
76 $                         1, 1, 0,
77 $                         0, 1, 1}
78        means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with
79        itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the
80        diagonal block).
81 
82      DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
83      can be represented in the dfill, ofill format
84 
85    Contributed by Glenn Hammond
86 
87 .seealso DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly()
88 
89 @*/
90 PetscErrorCode  DMDASetBlockFills(DM da,const PetscInt *dfill,const PetscInt *ofill)
91 {
92   DM_DA          *dd = (DM_DA*)da->data;
93   PetscErrorCode ierr;
94   PetscInt       i,k,cnt = 1;
95 
96   PetscFunctionBegin;
97   ierr = DMDASetBlockFills_Private(dfill,dd->w,&dd->dfill);CHKERRQ(ierr);
98   ierr = DMDASetBlockFills_Private(ofill,dd->w,&dd->ofill);CHKERRQ(ierr);
99 
100   /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of
101    columns to the left with any nonzeros in them plus 1 */
102   ierr = PetscMalloc(dd->w*sizeof(PetscBool),&dd->ofillcols);CHKERRQ(ierr);
103   ierr = PetscMemzero(dd->ofillcols,dd->w*sizeof(PetscBool));CHKERRQ(ierr);
104   for (i=0; i<dd->w; i++) {
105     for (k=dd->ofill[i]; k<dd->ofill[i+1]; k++) dd->ofillcols[dd->ofill[k]] = 1;
106   }
107   for (i=0; i<dd->w; i++) {
108     if (dd->ofillcols[i]) {
109       dd->ofillcols[i] = cnt++;
110     }
111   }
112   PetscFunctionReturn(0);
113 }
114 
115 
116 #undef __FUNCT__
117 #define __FUNCT__ "DMCreateColoring_DA"
118 PetscErrorCode  DMCreateColoring_DA(DM da,ISColoringType ctype,MatType mtype,ISColoring *coloring)
119 {
120   PetscErrorCode   ierr;
121   PetscInt         dim,m,n,p,nc;
122   DMDABoundaryType bx,by,bz;
123   MPI_Comm         comm;
124   PetscMPIInt      size;
125   PetscBool        isBAIJ;
126   DM_DA            *dd = (DM_DA*)da->data;
127 
128   PetscFunctionBegin;
129   /*
130                                   m
131           ------------------------------------------------------
132          |                                                     |
133          |                                                     |
134          |               ----------------------                |
135          |               |                    |                |
136       n  |           yn  |                    |                |
137          |               |                    |                |
138          |               .---------------------                |
139          |             (xs,ys)     xn                          |
140          |            .                                        |
141          |         (gxs,gys)                                   |
142          |                                                     |
143           -----------------------------------------------------
144   */
145 
146   /*
147          nc - number of components per grid point
148          col - number of colors needed in one direction for single component problem
149 
150   */
151   ierr = DMDAGetInfo(da,&dim,0,0,0,&m,&n,&p,&nc,0,&bx,&by,&bz,0);CHKERRQ(ierr);
152 
153   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
154   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
155   if (ctype == IS_COLORING_GHOSTED) {
156     if (size == 1) {
157       ctype = IS_COLORING_GLOBAL;
158     } else if (dim > 1) {
159       if ((m==1 && bx == DMDA_BOUNDARY_PERIODIC) || (n==1 && by == DMDA_BOUNDARY_PERIODIC) || (p==1 && bz == DMDA_BOUNDARY_PERIODIC)) {
160         SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"IS_COLORING_GHOSTED cannot be used for periodic boundary condition having both ends of the domain  on the same process");
161       }
162     }
163   }
164 
165   /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ
166      matrices is for the blocks, not the individual matrix elements  */
167   ierr = PetscStrcmp(mtype,MATBAIJ,&isBAIJ);CHKERRQ(ierr);
168   if (!isBAIJ) {ierr = PetscStrcmp(mtype,MATMPIBAIJ,&isBAIJ);CHKERRQ(ierr);}
169   if (!isBAIJ) {ierr = PetscStrcmp(mtype,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);}
170   if (isBAIJ) {
171     dd->w = 1;
172     dd->xs = dd->xs/nc;
173     dd->xe = dd->xe/nc;
174     dd->Xs = dd->Xs/nc;
175     dd->Xe = dd->Xe/nc;
176   }
177 
178   /*
179      We do not provide a getcoloring function in the DMDA operations because
180    the basic DMDA does not know about matrices. We think of DMDA as being more
181    more low-level then matrices.
182   */
183   if (dim == 1) {
184     ierr = DMCreateColoring_DA_1d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
185   } else if (dim == 2) {
186     ierr =  DMCreateColoring_DA_2d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
187   } else if (dim == 3) {
188     ierr =  DMCreateColoring_DA_3d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
189   } else SETERRQ1(((PetscObject)da)->comm,PETSC_ERR_SUP,"Not done for %D dimension, send us mail petsc-maint@mcs.anl.gov for code",dim);
190   if (isBAIJ) {
191     dd->w = nc;
192     dd->xs = dd->xs*nc;
193     dd->xe = dd->xe*nc;
194     dd->Xs = dd->Xs*nc;
195     dd->Xe = dd->Xe*nc;
196   }
197   PetscFunctionReturn(0);
198 }
199 
200 /* ---------------------------------------------------------------------------------*/
201 
202 #undef __FUNCT__
203 #define __FUNCT__ "DMCreateColoring_DA_2d_MPIAIJ"
204 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
205 {
206   PetscErrorCode   ierr;
207   PetscInt         xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,M,N,dim,s,k,nc,col;
208   PetscInt         ncolors;
209   MPI_Comm         comm;
210   DMDABoundaryType bx,by;
211   DMDAStencilType  st;
212   ISColoringValue  *colors;
213   DM_DA            *dd = (DM_DA*)da->data;
214 
215   PetscFunctionBegin;
216   /*
217          nc - number of components per grid point
218          col - number of colors needed in one direction for single component problem
219 
220   */
221   ierr = DMDAGetInfo(da,&dim,&m,&n,0,&M,&N,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
222   col    = 2*s + 1;
223   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
224   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
225   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
226 
227   /* special case as taught to us by Paul Hovland */
228   if (st == DMDA_STENCIL_STAR && s == 1) {
229     ierr = DMCreateColoring_DA_2d_5pt_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
230   } else {
231 
232     if (bx == DMDA_BOUNDARY_PERIODIC && (m % col)) SETERRQ2(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X (%d) is divisible\n\
233                                                             by 2*stencil_width + 1 (%d)\n", m, col);
234     if (by == DMDA_BOUNDARY_PERIODIC && (n % col)) SETERRQ2(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y (%d) is divisible\n\
235                                                             by 2*stencil_width + 1 (%d)\n", n, col);
236     if (ctype == IS_COLORING_GLOBAL) {
237       if (!dd->localcoloring) {
238         ierr = PetscMalloc(nc*nx*ny*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
239         ii = 0;
240         for (j=ys; j<ys+ny; j++) {
241           for (i=xs; i<xs+nx; i++) {
242             for (k=0; k<nc; k++) {
243               colors[ii++] = k + nc*((i % col) + col*(j % col));
244             }
245           }
246         }
247         ncolors = nc + nc*(col-1 + col*(col-1));
248         ierr = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,&dd->localcoloring);CHKERRQ(ierr);
249       }
250       *coloring = dd->localcoloring;
251     } else if (ctype == IS_COLORING_GHOSTED) {
252       if (!dd->ghostedcoloring) {
253         ierr = PetscMalloc(nc*gnx*gny*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
254         ii = 0;
255         for (j=gys; j<gys+gny; j++) {
256           for (i=gxs; i<gxs+gnx; i++) {
257             for (k=0; k<nc; k++) {
258               /* the complicated stuff is to handle periodic boundaries */
259               colors[ii++] = k + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col));
260             }
261           }
262         }
263         ncolors = nc + nc*(col - 1 + col*(col-1));
264         ierr = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,&dd->ghostedcoloring);CHKERRQ(ierr);
265         /* PetscIntView(ncolors,(PetscInt *)colors,0); */
266 
267         ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_GHOSTED);CHKERRQ(ierr);
268       }
269       *coloring = dd->ghostedcoloring;
270     } else SETERRQ1(((PetscObject)da)->comm,PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
271   }
272   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
273   PetscFunctionReturn(0);
274 }
275 
276 /* ---------------------------------------------------------------------------------*/
277 
278 #undef __FUNCT__
279 #define __FUNCT__ "DMCreateColoring_DA_3d_MPIAIJ"
280 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
281 {
282   PetscErrorCode    ierr;
283   PetscInt          xs,ys,nx,ny,i,j,gxs,gys,gnx,gny,m,n,p,dim,s,k,nc,col,zs,gzs,ii,l,nz,gnz,M,N,P;
284   PetscInt          ncolors;
285   MPI_Comm          comm;
286   DMDABoundaryType  bx,by,bz;
287   DMDAStencilType   st;
288   ISColoringValue   *colors;
289   DM_DA             *dd = (DM_DA*)da->data;
290 
291   PetscFunctionBegin;
292   /*
293          nc - number of components per grid point
294          col - number of colors needed in one direction for single component problem
295 
296   */
297   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
298   col    = 2*s + 1;
299   if (bx == DMDA_BOUNDARY_PERIODIC && (m % col)) SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
300                                                          by 2*stencil_width + 1\n");
301   if (by == DMDA_BOUNDARY_PERIODIC && (n % col)) SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
302                                                          by 2*stencil_width + 1\n");
303   if (bz == DMDA_BOUNDARY_PERIODIC && (p % col)) SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
304                                                          by 2*stencil_width + 1\n");
305 
306   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
307   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
308   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
309 
310   /* create the coloring */
311   if (ctype == IS_COLORING_GLOBAL) {
312     if (!dd->localcoloring) {
313       ierr = PetscMalloc(nc*nx*ny*nz*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
314       ii = 0;
315       for (k=zs; k<zs+nz; k++) {
316         for (j=ys; j<ys+ny; j++) {
317           for (i=xs; i<xs+nx; i++) {
318             for (l=0; l<nc; l++) {
319               colors[ii++] = l + nc*((i % col) + col*(j % col) + col*col*(k % col));
320             }
321           }
322         }
323       }
324       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
325       ierr = ISColoringCreate(comm,ncolors,nc*nx*ny*nz,colors,&dd->localcoloring);CHKERRQ(ierr);
326     }
327     *coloring = dd->localcoloring;
328   } else if (ctype == IS_COLORING_GHOSTED) {
329     if (!dd->ghostedcoloring) {
330       ierr = PetscMalloc(nc*gnx*gny*gnz*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
331       ii = 0;
332       for (k=gzs; k<gzs+gnz; k++) {
333         for (j=gys; j<gys+gny; j++) {
334           for (i=gxs; i<gxs+gnx; i++) {
335             for (l=0; l<nc; l++) {
336               /* the complicated stuff is to handle periodic boundaries */
337               colors[ii++] = l + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col) + col*col*(SetInRange(k,p) % col));
338             }
339           }
340         }
341       }
342       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
343       ierr = ISColoringCreate(comm,ncolors,nc*gnx*gny*gnz,colors,&dd->ghostedcoloring);CHKERRQ(ierr);
344       ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_GHOSTED);CHKERRQ(ierr);
345     }
346     *coloring = dd->ghostedcoloring;
347   } else SETERRQ1(((PetscObject)da)->comm,PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
348   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
349   PetscFunctionReturn(0);
350 }
351 
352 /* ---------------------------------------------------------------------------------*/
353 
354 #undef __FUNCT__
355 #define __FUNCT__ "DMCreateColoring_DA_1d_MPIAIJ"
356 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
357 {
358   PetscErrorCode    ierr;
359   PetscInt          xs,nx,i,i1,gxs,gnx,l,m,M,dim,s,nc,col;
360   PetscInt          ncolors;
361   MPI_Comm          comm;
362   DMDABoundaryType  bx;
363   ISColoringValue   *colors;
364   DM_DA             *dd = (DM_DA*)da->data;
365 
366   PetscFunctionBegin;
367   /*
368          nc - number of components per grid point
369          col - number of colors needed in one direction for single component problem
370 
371   */
372   ierr = DMDAGetInfo(da,&dim,&m,0,0,&M,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr);
373   col    = 2*s + 1;
374 
375   if (bx == DMDA_BOUNDARY_PERIODIC && (m % col)) SETERRQ2(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points %d is divisible\n\
376                                                           by 2*stencil_width + 1 %d\n",(int)m,(int)col);
377 
378   ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
379   ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
380   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
381 
382   /* create the coloring */
383   if (ctype == IS_COLORING_GLOBAL) {
384     if (!dd->localcoloring) {
385       ierr = PetscMalloc(nc*nx*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
386       if (dd->ofillcols) {
387         PetscInt tc = 0;
388         for (i=0; i<nc; i++) tc += (PetscInt) (dd->ofillcols[i] > 0);
389         i1 = 0;
390         for (i=xs; i<xs+nx; i++) {
391           for (l=0; l<nc; l++) {
392             if (dd->ofillcols[l] && (i % col)) {
393               colors[i1++] =  nc - 1 + tc*((i % col) - 1) + dd->ofillcols[l];
394             } else {
395               colors[i1++] = l;
396             }
397           }
398         }
399         ncolors = nc + 2*s*tc;
400       } else {
401         i1 = 0;
402         for (i=xs; i<xs+nx; i++) {
403           for (l=0; l<nc; l++) {
404             colors[i1++] = l + nc*(i % col);
405           }
406         }
407         ncolors = nc + nc*(col-1);
408       }
409       ierr = ISColoringCreate(comm,ncolors,nc*nx,colors,&dd->localcoloring);CHKERRQ(ierr);
410     }
411     *coloring = dd->localcoloring;
412   } else if (ctype == IS_COLORING_GHOSTED) {
413     if (!dd->ghostedcoloring) {
414       ierr = PetscMalloc(nc*gnx*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
415       i1 = 0;
416       for (i=gxs; i<gxs+gnx; i++) {
417         for (l=0; l<nc; l++) {
418           /* the complicated stuff is to handle periodic boundaries */
419           colors[i1++] = l + nc*(SetInRange(i,m) % col);
420         }
421       }
422       ncolors = nc + nc*(col-1);
423       ierr = ISColoringCreate(comm,ncolors,nc*gnx,colors,&dd->ghostedcoloring);CHKERRQ(ierr);
424       ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_GHOSTED);CHKERRQ(ierr);
425     }
426     *coloring = dd->ghostedcoloring;
427   } else SETERRQ1(((PetscObject)da)->comm,PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
428   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
429   PetscFunctionReturn(0);
430 }
431 
432 #undef __FUNCT__
433 #define __FUNCT__ "DMCreateColoring_DA_2d_5pt_MPIAIJ"
434 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
435 {
436   PetscErrorCode    ierr;
437   PetscInt          xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,dim,s,k,nc;
438   PetscInt          ncolors;
439   MPI_Comm          comm;
440   DMDABoundaryType  bx,by;
441   ISColoringValue   *colors;
442   DM_DA             *dd = (DM_DA*)da->data;
443 
444   PetscFunctionBegin;
445   /*
446          nc - number of components per grid point
447          col - number of colors needed in one direction for single component problem
448 
449   */
450   ierr   = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,0);CHKERRQ(ierr);
451   ierr   = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
452   ierr   = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
453   ierr   = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
454 
455   if (bx == DMDA_BOUNDARY_PERIODIC && (m % 5)) SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible by 5\n");
456   if (by == DMDA_BOUNDARY_PERIODIC && (n % 5)) SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible by 5\n");
457 
458   /* create the coloring */
459   if (ctype == IS_COLORING_GLOBAL) {
460     if (!dd->localcoloring) {
461       ierr = PetscMalloc(nc*nx*ny*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
462       ii = 0;
463       for (j=ys; j<ys+ny; j++) {
464         for (i=xs; i<xs+nx; i++) {
465           for (k=0; k<nc; k++) {
466             colors[ii++] = k + nc*((3*j+i) % 5);
467           }
468         }
469       }
470       ncolors = 5*nc;
471       ierr = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,&dd->localcoloring);CHKERRQ(ierr);
472     }
473     *coloring = dd->localcoloring;
474   } else if (ctype == IS_COLORING_GHOSTED) {
475     if (!dd->ghostedcoloring) {
476       ierr = PetscMalloc(nc*gnx*gny*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
477       ii = 0;
478       for (j=gys; j<gys+gny; j++) {
479         for (i=gxs; i<gxs+gnx; i++) {
480           for (k=0; k<nc; k++) {
481             colors[ii++] = k + nc*((3*SetInRange(j,n) + SetInRange(i,m)) % 5);
482           }
483         }
484       }
485       ncolors = 5*nc;
486       ierr = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,&dd->ghostedcoloring);CHKERRQ(ierr);
487       ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_GHOSTED);CHKERRQ(ierr);
488     }
489     *coloring = dd->ghostedcoloring;
490   } else SETERRQ1(((PetscObject)da)->comm,PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
491   PetscFunctionReturn(0);
492 }
493 
494 /* =========================================================================== */
495 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM,Mat);
496 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM,Mat);
497 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM,Mat);
498 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM,Mat);
499 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM,Mat);
500 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM,Mat);
501 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM,Mat);
502 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM,Mat);
503 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM,Mat);
504 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM,Mat);
505 
506 #undef __FUNCT__
507 #define __FUNCT__ "MatSetupDM"
508 /*@C
509    MatSetupDM - Sets the DMDA that is to be used by the HYPRE_StructMatrix PETSc matrix
510 
511    Logically Collective on Mat
512 
513    Input Parameters:
514 +  mat - the matrix
515 -  da - the da
516 
517    Level: intermediate
518 
519 @*/
520 PetscErrorCode MatSetupDM(Mat mat,DM da)
521 {
522   PetscErrorCode ierr;
523 
524   PetscFunctionBegin;
525   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
526   PetscValidHeaderSpecific(da,DM_CLASSID,1);
527   ierr = PetscTryMethod(mat,"MatSetupDM_C",(Mat,DM),(mat,da));CHKERRQ(ierr);
528   PetscFunctionReturn(0);
529 }
530 
531 EXTERN_C_BEGIN
532 #undef __FUNCT__
533 #define __FUNCT__ "MatView_MPI_DA"
534 PetscErrorCode  MatView_MPI_DA(Mat A,PetscViewer viewer)
535 {
536   DM             da;
537   PetscErrorCode ierr;
538   const char     *prefix;
539   Mat            Anatural;
540   AO             ao;
541   PetscInt       rstart,rend,*petsc,i;
542   IS             is;
543   MPI_Comm       comm;
544   PetscViewerFormat format;
545 
546   PetscFunctionBegin;
547   /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */
548   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
549   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(0);
550 
551   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
552   ierr = MatGetDM(A, &da);CHKERRQ(ierr);
553   if (!da) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
554 
555   ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);
556   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
557   ierr = PetscMalloc((rend-rstart)*sizeof(PetscInt),&petsc);CHKERRQ(ierr);
558   for (i=rstart; i<rend; i++) petsc[i-rstart] = i;
559   ierr = AOApplicationToPetsc(ao,rend-rstart,petsc);CHKERRQ(ierr);
560   ierr = ISCreateGeneral(comm,rend-rstart,petsc,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
561 
562   /* call viewer on natural ordering */
563   ierr = MatGetSubMatrix(A,is,is,MAT_INITIAL_MATRIX,&Anatural);CHKERRQ(ierr);
564   ierr = ISDestroy(&is);CHKERRQ(ierr);
565   ierr = PetscObjectGetOptionsPrefix((PetscObject)A,&prefix);CHKERRQ(ierr);
566   ierr = PetscObjectSetOptionsPrefix((PetscObject)Anatural,prefix);CHKERRQ(ierr);
567   ierr = PetscObjectSetName((PetscObject)Anatural,((PetscObject)A)->name);CHKERRQ(ierr);
568   ierr = MatView(Anatural,viewer);CHKERRQ(ierr);
569   ierr = MatDestroy(&Anatural);CHKERRQ(ierr);
570   PetscFunctionReturn(0);
571 }
572 EXTERN_C_END
573 
574 EXTERN_C_BEGIN
575 #undef __FUNCT__
576 #define __FUNCT__ "MatLoad_MPI_DA"
577 PetscErrorCode  MatLoad_MPI_DA(Mat A,PetscViewer viewer)
578 {
579   DM             da;
580   PetscErrorCode ierr;
581   Mat            Anatural,Aapp;
582   AO             ao;
583   PetscInt       rstart,rend,*app,i;
584   IS             is;
585   MPI_Comm       comm;
586 
587   PetscFunctionBegin;
588   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
589   ierr = MatGetDM(A, &da);CHKERRQ(ierr);
590   if (!da) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
591 
592   /* Load the matrix in natural ordering */
593   ierr = MatCreate(((PetscObject)A)->comm,&Anatural);CHKERRQ(ierr);
594   ierr = MatSetType(Anatural,((PetscObject)A)->type_name);CHKERRQ(ierr);
595   ierr = MatSetSizes(Anatural,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
596   ierr = MatLoad(Anatural,viewer);CHKERRQ(ierr);
597 
598   /* Map natural ordering to application ordering and create IS */
599   ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);
600   ierr = MatGetOwnershipRange(Anatural,&rstart,&rend);CHKERRQ(ierr);
601   ierr = PetscMalloc((rend-rstart)*sizeof(PetscInt),&app);CHKERRQ(ierr);
602   for (i=rstart; i<rend; i++) app[i-rstart] = i;
603   ierr = AOPetscToApplication(ao,rend-rstart,app);CHKERRQ(ierr);
604   ierr = ISCreateGeneral(comm,rend-rstart,app,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
605 
606   /* Do permutation and replace header */
607   ierr = MatGetSubMatrix(Anatural,is,is,MAT_INITIAL_MATRIX,&Aapp);CHKERRQ(ierr);
608   ierr = MatHeaderReplace(A,Aapp);CHKERRQ(ierr);
609   ierr = ISDestroy(&is);CHKERRQ(ierr);
610   ierr = MatDestroy(&Anatural);CHKERRQ(ierr);
611   PetscFunctionReturn(0);
612 }
613 EXTERN_C_END
614 
615 #undef __FUNCT__
616 #define __FUNCT__ "DMCreateMatrix_DA"
617 PetscErrorCode DMCreateMatrix_DA(DM da, MatType mtype,Mat *J)
618 {
619   PetscErrorCode ierr;
620   PetscInt       dim,dof,nx,ny,nz,dims[3],starts[3],M,N,P;
621   Mat            A;
622   MPI_Comm       comm;
623   MatType        Atype;
624   PetscSection   section, sectionGlobal;
625   void           (*aij)(void)=PETSC_NULL,(*baij)(void)=PETSC_NULL,(*sbaij)(void)=PETSC_NULL;
626   MatType        ttype[256];
627   PetscBool      flg;
628   PetscMPIInt    size;
629   DM_DA          *dd = (DM_DA*)da->data;
630 
631   PetscFunctionBegin;
632 #if !defined(PETSC_USE_DYNAMIC_LIBRARIES)
633   ierr = MatInitializePackage(PETSC_NULL);CHKERRQ(ierr);
634 #endif
635   if (!mtype) mtype = MATAIJ;
636   ierr = PetscStrcpy((char*)ttype,mtype);CHKERRQ(ierr);
637   ierr = PetscOptionsBegin(((PetscObject)da)->comm,((PetscObject)da)->prefix,"DMDA options","Mat");CHKERRQ(ierr);
638   ierr = PetscOptionsList("-dm_mat_type","Matrix type","MatSetType",MatList,mtype,(char*)ttype,256,&flg);CHKERRQ(ierr);
639   ierr = PetscOptionsEnd();
640 
641   ierr = DMGetDefaultSection(da, &section);CHKERRQ(ierr);
642   if (section) {
643     PetscInt  bs = -1;
644     PetscInt  localSize;
645     PetscBool isShell, isBlock, isSeqBlock, isMPIBlock, isSymBlock, isSymSeqBlock, isSymMPIBlock, isSymmetric;
646 
647     ierr = DMGetDefaultGlobalSection(da, &sectionGlobal);CHKERRQ(ierr);
648     ierr = PetscSectionGetConstrainedStorageSize(sectionGlobal, &localSize);CHKERRQ(ierr);
649     ierr = MatCreate(((PetscObject) da)->comm, J);CHKERRQ(ierr);
650     ierr = MatSetSizes(*J, localSize, localSize, PETSC_DETERMINE, PETSC_DETERMINE);CHKERRQ(ierr);
651     ierr = MatSetType(*J, mtype);CHKERRQ(ierr);
652     ierr = MatSetFromOptions(*J);CHKERRQ(ierr);
653     ierr = PetscStrcmp(mtype, MATSHELL, &isShell);CHKERRQ(ierr);
654     ierr = PetscStrcmp(mtype, MATBAIJ, &isBlock);CHKERRQ(ierr);
655     ierr = PetscStrcmp(mtype, MATSEQBAIJ, &isSeqBlock);CHKERRQ(ierr);
656     ierr = PetscStrcmp(mtype, MATMPIBAIJ, &isMPIBlock);CHKERRQ(ierr);
657     ierr = PetscStrcmp(mtype, MATSBAIJ, &isSymBlock);CHKERRQ(ierr);
658     ierr = PetscStrcmp(mtype, MATSEQSBAIJ, &isSymSeqBlock);CHKERRQ(ierr);
659     ierr = PetscStrcmp(mtype, MATMPISBAIJ, &isSymMPIBlock);CHKERRQ(ierr);
660     /* Check for symmetric storage */
661     isSymmetric = (PetscBool) (isSymBlock || isSymSeqBlock || isSymMPIBlock);
662     if (isSymmetric) {
663       ierr = MatSetOption(*J, MAT_IGNORE_LOWER_TRIANGULAR, PETSC_TRUE);CHKERRQ(ierr);
664     }
665     if (!isShell) {
666       PetscInt *dnz, *onz, *dnzu, *onzu, bsLocal;
667 
668       if (bs < 0) {
669         if (isBlock || isSeqBlock || isMPIBlock || isSymBlock || isSymSeqBlock || isSymMPIBlock) {
670           PetscInt pStart, pEnd, p, dof;
671 
672           ierr = PetscSectionGetChart(sectionGlobal, &pStart, &pEnd);CHKERRQ(ierr);
673           for (p = pStart; p < pEnd; ++p) {
674             ierr = PetscSectionGetDof(sectionGlobal, p, &dof);CHKERRQ(ierr);
675             if (dof) {
676               bs = dof;
677               break;
678             }
679           }
680         } else {
681           bs = 1;
682         }
683         /* Must have same blocksize on all procs (some might have no points) */
684         bsLocal = bs;
685         ierr = MPI_Allreduce(&bsLocal, &bs, 1, MPIU_INT, MPI_MAX, ((PetscObject) da)->comm);CHKERRQ(ierr);
686       }
687       ierr = PetscMalloc4(localSize/bs, PetscInt, &dnz, localSize/bs, PetscInt, &onz, localSize/bs, PetscInt, &dnzu, localSize/bs, PetscInt, &onzu);CHKERRQ(ierr);
688       ierr = PetscMemzero(dnz,  localSize/bs * sizeof(PetscInt));CHKERRQ(ierr);
689       ierr = PetscMemzero(onz,  localSize/bs * sizeof(PetscInt));CHKERRQ(ierr);
690       ierr = PetscMemzero(dnzu, localSize/bs * sizeof(PetscInt));CHKERRQ(ierr);
691       ierr = PetscMemzero(onzu, localSize/bs * sizeof(PetscInt));CHKERRQ(ierr);
692       /* ierr = DMPlexPreallocateOperator(dm, bs, section, sectionGlobal, dnz, onz, dnzu, onzu, *J, fillMatrix);CHKERRQ(ierr); */
693       ierr = PetscFree4(dnz, onz, dnzu, onzu);CHKERRQ(ierr);
694     }
695   }
696   /*
697                                   m
698           ------------------------------------------------------
699          |                                                     |
700          |                                                     |
701          |               ----------------------                |
702          |               |                    |                |
703       n  |           ny  |                    |                |
704          |               |                    |                |
705          |               .---------------------                |
706          |             (xs,ys)     nx                          |
707          |            .                                        |
708          |         (gxs,gys)                                   |
709          |                                                     |
710           -----------------------------------------------------
711   */
712 
713   /*
714          nc - number of components per grid point
715          col - number of colors needed in one direction for single component problem
716 
717   */
718   M = dd->M;
719   N = dd->N;
720   P = dd->P;
721   dim = dd->dim;
722   dof = dd->w;
723   /* ierr = DMDAGetInfo(da,&dim,&M,&N,&P,0,0,0,&dof,0,0,0,0,0);CHKERRQ(ierr); */
724   ierr = DMDAGetCorners(da,0,0,0,&nx,&ny,&nz);CHKERRQ(ierr);
725   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
726   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
727   ierr = MatSetSizes(A,dof*nx*ny*nz,dof*nx*ny*nz,dof*M*N*P,dof*M*N*P);CHKERRQ(ierr);
728   ierr = MatSetType(A,(MatType)ttype);CHKERRQ(ierr);
729   ierr = MatSetDM(A,da);CHKERRQ(ierr);
730   ierr = MatSetFromOptions(A);CHKERRQ(ierr);
731   ierr = MatGetType(A,&Atype);CHKERRQ(ierr);
732   /*
733      We do not provide a getmatrix function in the DMDA operations because
734    the basic DMDA does not know about matrices. We think of DMDA as being more
735    more low-level than matrices. This is kind of cheating but, cause sometimes
736    we think of DMDA has higher level than matrices.
737 
738      We could switch based on Atype (or mtype), but we do not since the
739    specialized setting routines depend only the particular preallocation
740    details of the matrix, not the type itself.
741   */
742   ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJSetPreallocation_C",&aij);CHKERRQ(ierr);
743   if (!aij) {
744     ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJSetPreallocation_C",&aij);CHKERRQ(ierr);
745   }
746   if (!aij) {
747     ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIBAIJSetPreallocation_C",&baij);CHKERRQ(ierr);
748     if (!baij) {
749       ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",&baij);CHKERRQ(ierr);
750     }
751     if (!baij) {
752       ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPISBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr);
753       if (!sbaij) {
754         ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr);
755       }
756     }
757   }
758   if (aij) {
759     if (dim == 1) {
760       if (dd->ofill) {
761         ierr = DMCreateMatrix_DA_1d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
762       } else {
763         ierr = DMCreateMatrix_DA_1d_MPIAIJ(da,A);CHKERRQ(ierr);
764       }
765     } else if (dim == 2) {
766       if (dd->ofill) {
767         ierr = DMCreateMatrix_DA_2d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
768       } else {
769         ierr = DMCreateMatrix_DA_2d_MPIAIJ(da,A);CHKERRQ(ierr);
770       }
771     } else if (dim == 3) {
772       if (dd->ofill) {
773         ierr = DMCreateMatrix_DA_3d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
774       } else {
775         ierr = DMCreateMatrix_DA_3d_MPIAIJ(da,A);CHKERRQ(ierr);
776       }
777     }
778   } else if (baij) {
779     if (dim == 2) {
780       ierr = DMCreateMatrix_DA_2d_MPIBAIJ(da,A);CHKERRQ(ierr);
781     } else if (dim == 3) {
782       ierr = DMCreateMatrix_DA_3d_MPIBAIJ(da,A);CHKERRQ(ierr);
783     } else  SETERRQ3(((PetscObject)da)->comm,PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
784   } else if (sbaij) {
785     if (dim == 2) {
786       ierr = DMCreateMatrix_DA_2d_MPISBAIJ(da,A);CHKERRQ(ierr);
787     } else if (dim == 3) {
788       ierr = DMCreateMatrix_DA_3d_MPISBAIJ(da,A);CHKERRQ(ierr);
789     } else SETERRQ3(((PetscObject)da)->comm,PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
790   } else {
791     ISLocalToGlobalMapping ltog,ltogb;
792     ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
793     ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
794     ierr = MatSetUp(A);CHKERRQ(ierr);
795     ierr = MatSetLocalToGlobalMapping(A,ltog,ltog);CHKERRQ(ierr);
796     ierr = MatSetLocalToGlobalMappingBlock(A,ltogb,ltogb);CHKERRQ(ierr);
797   }
798   ierr = DMDAGetGhostCorners(da,&starts[0],&starts[1],&starts[2],&dims[0],&dims[1],&dims[2]);CHKERRQ(ierr);
799   ierr = MatSetStencil(A,dim,dims,starts,dof);CHKERRQ(ierr);
800   ierr = MatSetDM(A,da);CHKERRQ(ierr);
801   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
802   if (size > 1) {
803     /* change viewer to display matrix in natural ordering */
804     ierr = MatShellSetOperation(A, MATOP_VIEW, (void (*)(void)) MatView_MPI_DA);CHKERRQ(ierr);
805     ierr = MatShellSetOperation(A, MATOP_LOAD, (void (*)(void)) MatLoad_MPI_DA);CHKERRQ(ierr);
806   }
807   *J = A;
808   PetscFunctionReturn(0);
809 }
810 
811 /* ---------------------------------------------------------------------------------*/
812 #undef __FUNCT__
813 #define __FUNCT__ "DMCreateMatrix_DA_2d_MPIAIJ"
814 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da,Mat J)
815 {
816   PetscErrorCode         ierr;
817   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = PETSC_NULL,k,nc,*rows = PETSC_NULL,col,cnt,l,p;
818   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
819   MPI_Comm               comm;
820   PetscScalar            *values;
821   DMDABoundaryType       bx,by;
822   ISLocalToGlobalMapping ltog,ltogb;
823   DMDAStencilType        st;
824 
825   PetscFunctionBegin;
826   /*
827          nc - number of components per grid point
828          col - number of colors needed in one direction for single component problem
829 
830   */
831   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
832   col = 2*s + 1;
833   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
834   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
835   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
836 
837   ierr = PetscMalloc2(nc,PetscInt,&rows,col*col*nc*nc,PetscInt,&cols);CHKERRQ(ierr);
838   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
839   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
840 
841   /* determine the matrix preallocation information */
842   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
843   for (i=xs; i<xs+nx; i++) {
844 
845     pstart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
846     pend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
847 
848     for (j=ys; j<ys+ny; j++) {
849       slot = i - gxs + gnx*(j - gys);
850 
851       lstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
852       lend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
853 
854       cnt  = 0;
855       for (k=0; k<nc; k++) {
856         for (l=lstart; l<lend+1; l++) {
857           for (p=pstart; p<pend+1; p++) {
858             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
859               cols[cnt++]  = k + nc*(slot + gnx*l + p);
860             }
861           }
862         }
863         rows[k] = k + nc*(slot);
864       }
865       ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
866     }
867   }
868   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
869   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
870   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
871   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
872 
873   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
874   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
875 
876   /*
877     For each node in the grid: we get the neighbors in the local (on processor ordering
878     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
879     PETSc ordering.
880   */
881   if (!da->prealloc_only) {
882     ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
883     ierr = PetscMemzero(values,col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
884     for (i=xs; i<xs+nx; i++) {
885 
886       pstart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
887       pend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
888 
889       for (j=ys; j<ys+ny; j++) {
890         slot = i - gxs + gnx*(j - gys);
891 
892         lstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
893         lend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
894 
895         cnt  = 0;
896         for (k=0; k<nc; k++) {
897           for (l=lstart; l<lend+1; l++) {
898             for (p=pstart; p<pend+1; p++) {
899               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
900                 cols[cnt++]  = k + nc*(slot + gnx*l + p);
901               }
902             }
903           }
904           rows[k]      = k + nc*(slot);
905         }
906         ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
907       }
908     }
909     ierr = PetscFree(values);CHKERRQ(ierr);
910     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
911     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
912   }
913   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
914   PetscFunctionReturn(0);
915 }
916 
917 #undef __FUNCT__
918 #define __FUNCT__ "DMCreateMatrix_DA_2d_MPIAIJ_Fill"
919 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da,Mat J)
920 {
921   PetscErrorCode         ierr;
922   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
923   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt,l,p;
924   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
925   DM_DA                  *dd = (DM_DA*)da->data;
926   PetscInt               ifill_col,*ofill = dd->ofill, *dfill = dd->dfill;
927   MPI_Comm               comm;
928   PetscScalar            *values;
929   DMDABoundaryType       bx,by;
930   ISLocalToGlobalMapping ltog,ltogb;
931   DMDAStencilType        st;
932 
933   PetscFunctionBegin;
934   /*
935          nc - number of components per grid point
936          col - number of colors needed in one direction for single component problem
937 
938   */
939   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
940   col = 2*s + 1;
941   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
942   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
943   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
944 
945   ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscInt),&cols);CHKERRQ(ierr);
946   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
947   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
948 
949   /* determine the matrix preallocation information */
950   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
951   for (i=xs; i<xs+nx; i++) {
952 
953     pstart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
954     pend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
955 
956     for (j=ys; j<ys+ny; j++) {
957       slot = i - gxs + gnx*(j - gys);
958 
959       lstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
960       lend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
961 
962       for (k=0; k<nc; k++) {
963         cnt  = 0;
964         for (l=lstart; l<lend+1; l++) {
965           for (p=pstart; p<pend+1; p++) {
966             if (l || p) {
967               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
968                 for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++)
969                   cols[cnt++]  = ofill[ifill_col] + nc*(slot + gnx*l + p);
970               }
971             } else {
972               if (dfill) {
973                 for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++)
974                   cols[cnt++]  = dfill[ifill_col] + nc*(slot + gnx*l + p);
975               } else {
976                 for (ifill_col=0; ifill_col<nc; ifill_col++)
977                   cols[cnt++]  = ifill_col + nc*(slot + gnx*l + p);
978               }
979             }
980           }
981         }
982         row = k + nc*(slot);
983         ierr = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
984       }
985     }
986   }
987   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
988   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
989   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
990   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
991   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
992 
993   /*
994     For each node in the grid: we get the neighbors in the local (on processor ordering
995     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
996     PETSc ordering.
997   */
998   if (!da->prealloc_only) {
999     ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1000     ierr = PetscMemzero(values,col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1001     for (i=xs; i<xs+nx; i++) {
1002 
1003       pstart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1004       pend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1005 
1006       for (j=ys; j<ys+ny; j++) {
1007         slot = i - gxs + gnx*(j - gys);
1008 
1009         lstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1010         lend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1011 
1012         for (k=0; k<nc; k++) {
1013           cnt  = 0;
1014           for (l=lstart; l<lend+1; l++) {
1015             for (p=pstart; p<pend+1; p++) {
1016               if (l || p) {
1017                 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
1018                   for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++)
1019                     cols[cnt++]  = ofill[ifill_col] + nc*(slot + gnx*l + p);
1020                 }
1021               } else {
1022                 if (dfill) {
1023                   for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++)
1024                     cols[cnt++]  = dfill[ifill_col] + nc*(slot + gnx*l + p);
1025                 } else {
1026                   for (ifill_col=0; ifill_col<nc; ifill_col++)
1027                     cols[cnt++]  = ifill_col + nc*(slot + gnx*l + p);
1028                 }
1029               }
1030             }
1031           }
1032           row  = k + nc*(slot);
1033           ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1034         }
1035       }
1036     }
1037     ierr = PetscFree(values);CHKERRQ(ierr);
1038     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1039     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1040   }
1041   ierr = PetscFree(cols);CHKERRQ(ierr);
1042   PetscFunctionReturn(0);
1043 }
1044 
1045 /* ---------------------------------------------------------------------------------*/
1046 
1047 #undef __FUNCT__
1048 #define __FUNCT__ "DMCreateMatrix_DA_3d_MPIAIJ"
1049 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da,Mat J)
1050 {
1051   PetscErrorCode         ierr;
1052   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1053   PetscInt               m,n,dim,s,*cols = PETSC_NULL,k,nc,*rows = PETSC_NULL,col,cnt,l,p,*dnz = PETSC_NULL,*onz = PETSC_NULL;
1054   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1055   MPI_Comm               comm;
1056   PetscScalar            *values;
1057   DMDABoundaryType       bx,by,bz;
1058   ISLocalToGlobalMapping ltog,ltogb;
1059   DMDAStencilType        st;
1060 
1061   PetscFunctionBegin;
1062   /*
1063          nc - number of components per grid point
1064          col - number of colors needed in one direction for single component problem
1065 
1066   */
1067   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1068   col    = 2*s + 1;
1069 
1070   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1071   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1072   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1073 
1074   ierr = PetscMalloc2(nc,PetscInt,&rows,col*col*col*nc*nc,PetscInt,&cols);CHKERRQ(ierr);
1075   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1076   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1077 
1078   /* determine the matrix preallocation information */
1079   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1080   for (i=xs; i<xs+nx; i++) {
1081     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1082     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1083     for (j=ys; j<ys+ny; j++) {
1084       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1085       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1086       for (k=zs; k<zs+nz; k++) {
1087         kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1088         kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1089 
1090         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1091 
1092         cnt  = 0;
1093         for (l=0; l<nc; l++) {
1094           for (ii=istart; ii<iend+1; ii++) {
1095             for (jj=jstart; jj<jend+1; jj++) {
1096               for (kk=kstart; kk<kend+1; kk++) {
1097                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1098                   cols[cnt++]  = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1099                 }
1100               }
1101             }
1102           }
1103           rows[l] = l + nc*(slot);
1104         }
1105         ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1106       }
1107     }
1108   }
1109   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1110   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1111   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1112   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1113   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1114   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1115 
1116   /*
1117     For each node in the grid: we get the neighbors in the local (on processor ordering
1118     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1119     PETSc ordering.
1120   */
1121   if (!da->prealloc_only) {
1122     ierr = PetscMalloc(col*col*col*nc*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1123     ierr = PetscMemzero(values,col*col*col*nc*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1124     for (i=xs; i<xs+nx; i++) {
1125       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1126       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1127       for (j=ys; j<ys+ny; j++) {
1128         jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1129         jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1130         for (k=zs; k<zs+nz; k++) {
1131           kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1132           kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1133 
1134           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1135 
1136           cnt  = 0;
1137           for (l=0; l<nc; l++) {
1138             for (ii=istart; ii<iend+1; ii++) {
1139               for (jj=jstart; jj<jend+1; jj++) {
1140                 for (kk=kstart; kk<kend+1; kk++) {
1141                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1142                     cols[cnt++]  = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1143                   }
1144                 }
1145               }
1146             }
1147             rows[l]      = l + nc*(slot);
1148           }
1149           ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1150         }
1151       }
1152     }
1153     ierr = PetscFree(values);CHKERRQ(ierr);
1154     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1155     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1156   }
1157   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1158   PetscFunctionReturn(0);
1159 }
1160 
1161 /* ---------------------------------------------------------------------------------*/
1162 
1163 #undef __FUNCT__
1164 #define __FUNCT__ "DMCreateMatrix_DA_1d_MPIAIJ_Fill"
1165 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da,Mat J)
1166 {
1167   PetscErrorCode         ierr;
1168   DM_DA                  *dd = (DM_DA*)da->data;
1169   PetscInt               xs,nx,i,j,gxs,gnx,row,k,l;
1170   PetscInt               m,dim,s,*cols = PETSC_NULL,nc,col,cnt,*ocols;
1171   PetscInt               *ofill = dd->ofill;
1172   PetscScalar            *values;
1173   DMDABoundaryType       bx;
1174   ISLocalToGlobalMapping ltog,ltogb;
1175   PetscMPIInt            rank,size;
1176 
1177   PetscFunctionBegin;
1178   if (dd->bx == DMDA_BOUNDARY_PERIODIC) SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"With fill provided not implemented with periodic boundary conditions");
1179   ierr = MPI_Comm_rank(((PetscObject)da)->comm,&rank);CHKERRQ(ierr);
1180   ierr = MPI_Comm_size(((PetscObject)da)->comm,&size);CHKERRQ(ierr);
1181 
1182   /*
1183          nc - number of components per grid point
1184          col - number of colors needed in one direction for single component problem
1185 
1186   */
1187   ierr = DMDAGetInfo(da,&dim,&m,0,0,0,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr);
1188   col    = 2*s + 1;
1189 
1190   ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
1191   ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
1192 
1193   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1194   ierr = PetscMalloc2(nx*nc,PetscInt,&cols,nx*nc,PetscInt,&ocols);CHKERRQ(ierr);
1195   ierr = PetscMemzero(cols,nx*nc*sizeof(PetscInt));CHKERRQ(ierr);
1196   ierr = PetscMemzero(ocols,nx*nc*sizeof(PetscInt));CHKERRQ(ierr);
1197 
1198   /*
1199         note should be smaller for first and last process with no periodic
1200         does not handle dfill
1201   */
1202   cnt  = 0;
1203   /* coupling with process to the left */
1204   for (i=0; i<s; i++) {
1205     for (j=0; j<nc; j++) {
1206       ocols[cnt] = ((!rank) ? 0 : (s - i)*(ofill[j+1] - ofill[j]));
1207       cols[cnt]  = nc + (s + i)*(ofill[j+1] - ofill[j]);
1208       cnt++;
1209     }
1210   }
1211   for (i=s; i<nx-s; i++) {
1212     for (j=0; j<nc; j++) {
1213       cols[cnt]  = nc + 2*s*(ofill[j+1] - ofill[j]);
1214       cnt++;
1215     }
1216   }
1217  /* coupling with process to the right */
1218   for (i=nx-s; i<nx; i++) {
1219     for (j=0; j<nc; j++) {
1220       ocols[cnt] = ((rank == (size-1)) ? 0 : (i - nx + s + 1)*(ofill[j+1] - ofill[j]));
1221       cols[cnt]  = nc + (s + nx - i - 1)*(ofill[j+1] - ofill[j]);
1222       cnt++;
1223     }
1224   }
1225 
1226   ierr = MatSeqAIJSetPreallocation(J,0,cols);CHKERRQ(ierr);
1227   ierr = MatMPIAIJSetPreallocation(J,0,cols,0,ocols);CHKERRQ(ierr);
1228   ierr = PetscFree2(cols,ocols);CHKERRQ(ierr);
1229 
1230   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1231   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1232   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1233   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1234 
1235   /*
1236     For each node in the grid: we get the neighbors in the local (on processor ordering
1237     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1238     PETSc ordering.
1239   */
1240   if (!da->prealloc_only) {
1241     ierr = PetscMalloc(col*nc*nc*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1242     ierr = PetscMalloc(col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1243     ierr = PetscMemzero(values,col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1244 
1245     row  = xs*nc;
1246     /* coupling with process to the left */
1247     for (i=xs; i<xs+s; i++) {
1248       for (j=0; j<nc; j++) {
1249         cnt = 0;
1250         if (rank) {
1251           for (l=0; l<s; l++) {
1252             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1253           }
1254         }
1255         for (k=0; k<nc; k++) {
1256           cols[cnt++] = i*nc + k;
1257         }
1258         for (l=0; l<s; l++) {
1259           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1260         }
1261         ierr = MatSetValues(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1262         row++;
1263       }
1264     }
1265     for (i=xs+s; i<xs+nx-s; i++) {
1266       for (j=0; j<nc; j++) {
1267         cnt = 0;
1268         for (l=0; l<s; l++) {
1269           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1270         }
1271         for (k=0; k<nc; k++) {
1272           cols[cnt++] = i*nc + k;
1273         }
1274         for (l=0; l<s; l++) {
1275           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1276         }
1277         ierr = MatSetValues(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1278         row++;
1279       }
1280     }
1281     /* coupling with process to the right */
1282     for (i=xs+nx-s; i<xs+nx; i++) {
1283       for (j=0; j<nc; j++) {
1284         cnt = 0;
1285         for (l=0; l<s; l++) {
1286           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1287         }
1288         for (k=0; k<nc; k++) {
1289           cols[cnt++] = i*nc + k;
1290         }
1291         if (rank < size-1) {
1292           for (l=0; l<s; l++) {
1293             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1294           }
1295         }
1296         ierr = MatSetValues(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1297         row++;
1298       }
1299     }
1300     ierr = PetscFree(values);CHKERRQ(ierr);
1301     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1302     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1303     ierr = PetscFree(cols);CHKERRQ(ierr);
1304   }
1305   PetscFunctionReturn(0);
1306 }
1307 
1308 /* ---------------------------------------------------------------------------------*/
1309 
1310 #undef __FUNCT__
1311 #define __FUNCT__ "DMCreateMatrix_DA_1d_MPIAIJ"
1312 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da,Mat J)
1313 {
1314   PetscErrorCode         ierr;
1315   PetscInt               xs,nx,i,i1,slot,gxs,gnx;
1316   PetscInt               m,dim,s,*cols = PETSC_NULL,nc,*rows = PETSC_NULL,col,cnt,l;
1317   PetscInt               istart,iend;
1318   PetscScalar            *values;
1319   DMDABoundaryType       bx;
1320   ISLocalToGlobalMapping ltog,ltogb;
1321 
1322   PetscFunctionBegin;
1323   /*
1324          nc - number of components per grid point
1325          col - number of colors needed in one direction for single component problem
1326 
1327   */
1328   ierr = DMDAGetInfo(da,&dim,&m,0,0,0,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr);
1329   col    = 2*s + 1;
1330 
1331   ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
1332   ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
1333 
1334   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1335   ierr = MatSeqAIJSetPreallocation(J,col*nc,0);CHKERRQ(ierr);
1336   ierr = MatMPIAIJSetPreallocation(J,col*nc,0,col*nc,0);CHKERRQ(ierr);
1337 
1338   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1339   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1340   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1341   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1342 
1343   /*
1344     For each node in the grid: we get the neighbors in the local (on processor ordering
1345     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1346     PETSc ordering.
1347   */
1348   if (!da->prealloc_only) {
1349     ierr = PetscMalloc2(nc,PetscInt,&rows,col*nc*nc,PetscInt,&cols);CHKERRQ(ierr);
1350     ierr = PetscMalloc(col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1351     ierr = PetscMemzero(values,col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1352     for (i=xs; i<xs+nx; i++) {
1353       istart = PetscMax(-s,gxs - i);
1354       iend   = PetscMin(s,gxs + gnx - i - 1);
1355       slot   = i - gxs;
1356 
1357       cnt  = 0;
1358       for (l=0; l<nc; l++) {
1359         for (i1=istart; i1<iend+1; i1++) {
1360           cols[cnt++] = l + nc*(slot + i1);
1361         }
1362         rows[l]      = l + nc*(slot);
1363       }
1364       ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1365     }
1366     ierr = PetscFree(values);CHKERRQ(ierr);
1367     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1368     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1369     ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1370   }
1371   PetscFunctionReturn(0);
1372 }
1373 
1374 #undef __FUNCT__
1375 #define __FUNCT__ "DMCreateMatrix_DA_2d_MPIBAIJ"
1376 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da,Mat J)
1377 {
1378   PetscErrorCode         ierr;
1379   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1380   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
1381   PetscInt               istart,iend,jstart,jend,ii,jj;
1382   MPI_Comm               comm;
1383   PetscScalar            *values;
1384   DMDABoundaryType       bx,by;
1385   DMDAStencilType        st;
1386   ISLocalToGlobalMapping ltog,ltogb;
1387 
1388   PetscFunctionBegin;
1389   /*
1390      nc - number of components per grid point
1391      col - number of colors needed in one direction for single component problem
1392   */
1393   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
1394   col = 2*s + 1;
1395 
1396   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
1397   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
1398   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1399 
1400   ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1401 
1402   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1403   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1404 
1405   /* determine the matrix preallocation information */
1406   ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
1407   for (i=xs; i<xs+nx; i++) {
1408     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1409     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1410     for (j=ys; j<ys+ny; j++) {
1411       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1412       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1413       slot = i - gxs + gnx*(j - gys);
1414 
1415       /* Find block columns in block row */
1416       cnt  = 0;
1417       for (ii=istart; ii<iend+1; ii++) {
1418         for (jj=jstart; jj<jend+1; jj++) {
1419           if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1420             cols[cnt++]  = slot + ii + gnx*jj;
1421           }
1422         }
1423       }
1424       ierr = MatPreallocateSetLocal(ltogb,1,&slot,ltogb,cnt,cols,dnz,onz);CHKERRQ(ierr);
1425     }
1426   }
1427   ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1428   ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1429   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1430 
1431   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1432   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1433 
1434   /*
1435     For each node in the grid: we get the neighbors in the local (on processor ordering
1436     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1437     PETSc ordering.
1438   */
1439   if (!da->prealloc_only) {
1440     ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1441     ierr = PetscMemzero(values,col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1442     for (i=xs; i<xs+nx; i++) {
1443       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1444       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1445       for (j=ys; j<ys+ny; j++) {
1446         jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1447         jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1448         slot = i - gxs + gnx*(j - gys);
1449         cnt  = 0;
1450         for (ii=istart; ii<iend+1; ii++) {
1451           for (jj=jstart; jj<jend+1; jj++) {
1452             if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1453               cols[cnt++]  = slot + ii + gnx*jj;
1454             }
1455           }
1456         }
1457         ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1458       }
1459     }
1460     ierr = PetscFree(values);CHKERRQ(ierr);
1461     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1462     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1463   }
1464   ierr = PetscFree(cols);CHKERRQ(ierr);
1465   PetscFunctionReturn(0);
1466 }
1467 
1468 #undef __FUNCT__
1469 #define __FUNCT__ "DMCreateMatrix_DA_3d_MPIBAIJ"
1470 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da,Mat J)
1471 {
1472   PetscErrorCode         ierr;
1473   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1474   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
1475   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1476   MPI_Comm               comm;
1477   PetscScalar            *values;
1478   DMDABoundaryType       bx,by,bz;
1479   DMDAStencilType        st;
1480   ISLocalToGlobalMapping ltog,ltogb;
1481 
1482   PetscFunctionBegin;
1483   /*
1484          nc - number of components per grid point
1485          col - number of colors needed in one direction for single component problem
1486 
1487   */
1488   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1489   col    = 2*s + 1;
1490 
1491   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1492   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1493   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1494 
1495   ierr  = PetscMalloc(col*col*col*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1496 
1497   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1498   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1499 
1500   /* determine the matrix preallocation information */
1501   ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1502   for (i=xs; i<xs+nx; i++) {
1503     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1504     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1505     for (j=ys; j<ys+ny; j++) {
1506       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1507       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1508       for (k=zs; k<zs+nz; k++) {
1509         kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1510         kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1511 
1512         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1513 
1514         /* Find block columns in block row */
1515         cnt  = 0;
1516         for (ii=istart; ii<iend+1; ii++) {
1517           for (jj=jstart; jj<jend+1; jj++) {
1518             for (kk=kstart; kk<kend+1; kk++) {
1519               if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1520                 cols[cnt++]  = slot + ii + gnx*jj + gnx*gny*kk;
1521               }
1522             }
1523           }
1524         }
1525         ierr = MatPreallocateSetLocal(ltogb,1,&slot,ltogb,cnt,cols,dnz,onz);CHKERRQ(ierr);
1526       }
1527     }
1528   }
1529   ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1530   ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1531   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1532 
1533   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1534   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1535 
1536   /*
1537     For each node in the grid: we get the neighbors in the local (on processor ordering
1538     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1539     PETSc ordering.
1540   */
1541   if (!da->prealloc_only) {
1542     ierr  = PetscMalloc(col*col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1543     ierr  = PetscMemzero(values,col*col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1544     for (i=xs; i<xs+nx; i++) {
1545       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1546       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1547       for (j=ys; j<ys+ny; j++) {
1548         jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1549         jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1550         for (k=zs; k<zs+nz; k++) {
1551           kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1552           kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1553 
1554           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1555 
1556           cnt  = 0;
1557           for (ii=istart; ii<iend+1; ii++) {
1558             for (jj=jstart; jj<jend+1; jj++) {
1559               for (kk=kstart; kk<kend+1; kk++) {
1560                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1561                   cols[cnt++]  = slot + ii + gnx*jj + gnx*gny*kk;
1562                 }
1563               }
1564             }
1565           }
1566           ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1567         }
1568       }
1569     }
1570     ierr = PetscFree(values);CHKERRQ(ierr);
1571     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1572     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1573   }
1574   ierr = PetscFree(cols);CHKERRQ(ierr);
1575   PetscFunctionReturn(0);
1576 }
1577 
1578 #undef __FUNCT__
1579 #define __FUNCT__ "L2GFilterUpperTriangular"
1580 /*
1581   This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to
1582   identify in the local ordering with periodic domain.
1583 */
1584 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog,PetscInt *row,PetscInt *cnt,PetscInt col[])
1585 {
1586   PetscErrorCode ierr;
1587   PetscInt       i,n;
1588 
1589   PetscFunctionBegin;
1590   ierr = ISLocalToGlobalMappingApply(ltog,1,row,row);CHKERRQ(ierr);
1591   ierr = ISLocalToGlobalMappingApply(ltog,*cnt,col,col);CHKERRQ(ierr);
1592   for (i=0,n=0; i<*cnt; i++) {
1593     if (col[i] >= *row) col[n++] = col[i];
1594   }
1595   *cnt = n;
1596   PetscFunctionReturn(0);
1597 }
1598 
1599 #undef __FUNCT__
1600 #define __FUNCT__ "DMCreateMatrix_DA_2d_MPISBAIJ"
1601 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da,Mat J)
1602 {
1603   PetscErrorCode         ierr;
1604   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1605   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
1606   PetscInt               istart,iend,jstart,jend,ii,jj;
1607   MPI_Comm               comm;
1608   PetscScalar            *values;
1609   DMDABoundaryType       bx,by;
1610   DMDAStencilType        st;
1611   ISLocalToGlobalMapping ltog,ltogb;
1612 
1613   PetscFunctionBegin;
1614   /*
1615      nc - number of components per grid point
1616      col - number of colors needed in one direction for single component problem
1617   */
1618   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
1619   col = 2*s + 1;
1620 
1621   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
1622   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
1623   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1624 
1625   ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1626 
1627   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1628   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1629 
1630   /* determine the matrix preallocation information */
1631   ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
1632   for (i=xs; i<xs+nx; i++) {
1633     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1634     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1635     for (j=ys; j<ys+ny; j++) {
1636       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1637       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1638       slot = i - gxs + gnx*(j - gys);
1639 
1640       /* Find block columns in block row */
1641       cnt  = 0;
1642       for (ii=istart; ii<iend+1; ii++) {
1643         for (jj=jstart; jj<jend+1; jj++) {
1644           if (st == DMDA_STENCIL_BOX || !ii || !jj) {
1645             cols[cnt++]  = slot + ii + gnx*jj;
1646           }
1647         }
1648       }
1649       ierr = L2GFilterUpperTriangular(ltogb,&slot,&cnt,cols);CHKERRQ(ierr);
1650       ierr = MatPreallocateSymmetricSet(slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
1651     }
1652   }
1653   ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1654   ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1655   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1656 
1657   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1658   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1659 
1660   /*
1661     For each node in the grid: we get the neighbors in the local (on processor ordering
1662     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1663     PETSc ordering.
1664   */
1665   if (!da->prealloc_only) {
1666     ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1667     ierr = PetscMemzero(values,col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1668     for (i=xs; i<xs+nx; i++) {
1669       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1670       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1671       for (j=ys; j<ys+ny; j++) {
1672         jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1673         jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1674         slot = i - gxs + gnx*(j - gys);
1675 
1676         /* Find block columns in block row */
1677         cnt  = 0;
1678         for (ii=istart; ii<iend+1; ii++) {
1679           for (jj=jstart; jj<jend+1; jj++) {
1680             if (st == DMDA_STENCIL_BOX || !ii || !jj) {
1681               cols[cnt++]  = slot + ii + gnx*jj;
1682             }
1683           }
1684         }
1685         ierr = L2GFilterUpperTriangular(ltogb,&slot,&cnt,cols);CHKERRQ(ierr);
1686         ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1687       }
1688     }
1689     ierr = PetscFree(values);CHKERRQ(ierr);
1690     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1691     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1692   }
1693   ierr = PetscFree(cols);CHKERRQ(ierr);
1694   PetscFunctionReturn(0);
1695 }
1696 
1697 #undef __FUNCT__
1698 #define __FUNCT__ "DMCreateMatrix_DA_3d_MPISBAIJ"
1699 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da,Mat J)
1700 {
1701   PetscErrorCode         ierr;
1702   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1703   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
1704   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1705   MPI_Comm               comm;
1706   PetscScalar            *values;
1707   DMDABoundaryType       bx,by,bz;
1708   DMDAStencilType        st;
1709   ISLocalToGlobalMapping ltog,ltogb;
1710 
1711   PetscFunctionBegin;
1712   /*
1713      nc - number of components per grid point
1714      col - number of colors needed in one direction for single component problem
1715   */
1716   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1717   col = 2*s + 1;
1718 
1719   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1720   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1721   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1722 
1723   /* create the matrix */
1724   ierr = PetscMalloc(col*col*col*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1725 
1726   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1727   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1728 
1729   /* determine the matrix preallocation information */
1730   ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1731   for (i=xs; i<xs+nx; i++) {
1732     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1733     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1734     for (j=ys; j<ys+ny; j++) {
1735       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1736       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1737       for (k=zs; k<zs+nz; k++) {
1738         kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1739         kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1740 
1741         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1742 
1743         /* Find block columns in block row */
1744         cnt  = 0;
1745         for (ii=istart; ii<iend+1; ii++) {
1746           for (jj=jstart; jj<jend+1; jj++) {
1747             for (kk=kstart; kk<kend+1; kk++) {
1748               if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
1749                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1750               }
1751             }
1752           }
1753         }
1754         ierr = L2GFilterUpperTriangular(ltogb,&slot,&cnt,cols);CHKERRQ(ierr);
1755         ierr = MatPreallocateSymmetricSet(slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
1756       }
1757     }
1758   }
1759   ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1760   ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1761   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1762 
1763   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1764   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1765 
1766   /*
1767     For each node in the grid: we get the neighbors in the local (on processor ordering
1768     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1769     PETSc ordering.
1770   */
1771   if (!da->prealloc_only) {
1772     ierr = PetscMalloc(col*col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1773     ierr = PetscMemzero(values,col*col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1774     for (i=xs; i<xs+nx; i++) {
1775       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1776       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1777       for (j=ys; j<ys+ny; j++) {
1778         jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1779         jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1780         for (k=zs; k<zs+nz; k++) {
1781           kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1782           kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1783 
1784           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1785 
1786           cnt  = 0;
1787           for (ii=istart; ii<iend+1; ii++) {
1788             for (jj=jstart; jj<jend+1; jj++) {
1789               for (kk=kstart; kk<kend+1; kk++) {
1790                 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
1791                   cols[cnt++]  = slot + ii + gnx*jj + gnx*gny*kk;
1792                 }
1793               }
1794             }
1795           }
1796           ierr = L2GFilterUpperTriangular(ltogb,&slot,&cnt,cols);CHKERRQ(ierr);
1797           ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1798         }
1799       }
1800     }
1801     ierr = PetscFree(values);CHKERRQ(ierr);
1802     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1803     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1804   }
1805   ierr = PetscFree(cols);CHKERRQ(ierr);
1806   PetscFunctionReturn(0);
1807 }
1808 
1809 /* ---------------------------------------------------------------------------------*/
1810 
1811 #undef __FUNCT__
1812 #define __FUNCT__ "DMCreateMatrix_DA_3d_MPIAIJ_Fill"
1813 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da,Mat J)
1814 {
1815   PetscErrorCode         ierr;
1816   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1817   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt,l,p,*dnz,*onz;
1818   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1819   DM_DA                  *dd = (DM_DA*)da->data;
1820   PetscInt               ifill_col,*dfill = dd->dfill,*ofill = dd->ofill;
1821   MPI_Comm               comm;
1822   PetscScalar            *values;
1823   DMDABoundaryType       bx,by,bz;
1824   ISLocalToGlobalMapping ltog,ltogb;
1825   DMDAStencilType        st;
1826 
1827   PetscFunctionBegin;
1828   /*
1829          nc - number of components per grid point
1830          col - number of colors needed in one direction for single component problem
1831 
1832   */
1833   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1834   col    = 2*s + 1;
1835   if (bx == DMDA_BOUNDARY_PERIODIC && (m % col)) SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
1836                  by 2*stencil_width + 1\n");
1837   if (by == DMDA_BOUNDARY_PERIODIC && (n % col)) SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
1838                  by 2*stencil_width + 1\n");
1839   if (bz == DMDA_BOUNDARY_PERIODIC && (p % col)) SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
1840                  by 2*stencil_width + 1\n");
1841 
1842   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1843   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1844   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1845 
1846   ierr = PetscMalloc(col*col*col*nc*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1847   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1848   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1849 
1850   /* determine the matrix preallocation information */
1851   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1852 
1853 
1854   for (i=xs; i<xs+nx; i++) {
1855     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1856     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1857     for (j=ys; j<ys+ny; j++) {
1858       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1859       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1860       for (k=zs; k<zs+nz; k++) {
1861         kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1862         kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1863 
1864         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1865 
1866         for (l=0; l<nc; l++) {
1867           cnt  = 0;
1868           for (ii=istart; ii<iend+1; ii++) {
1869             for (jj=jstart; jj<jend+1; jj++) {
1870               for (kk=kstart; kk<kend+1; kk++) {
1871                 if (ii || jj || kk) {
1872                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1873                     for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++)
1874                       cols[cnt++]  = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1875                   }
1876                 } else {
1877                   if (dfill) {
1878                     for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++)
1879                       cols[cnt++]  = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1880                   } else {
1881                     for (ifill_col=0; ifill_col<nc; ifill_col++)
1882                       cols[cnt++]  = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1883                   }
1884                 }
1885               }
1886             }
1887           }
1888           row  = l + nc*(slot);
1889           ierr = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1890         }
1891       }
1892     }
1893   }
1894   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1895   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1896   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1897   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1898   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1899 
1900   /*
1901     For each node in the grid: we get the neighbors in the local (on processor ordering
1902     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1903     PETSc ordering.
1904   */
1905   if (!da->prealloc_only) {
1906     ierr = PetscMalloc(col*col*col*nc*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1907     ierr = PetscMemzero(values,col*col*col*nc*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1908     for (i=xs; i<xs+nx; i++) {
1909       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1910       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1911       for (j=ys; j<ys+ny; j++) {
1912         jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1913         jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1914         for (k=zs; k<zs+nz; k++) {
1915           kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1916           kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1917 
1918           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1919 
1920           for (l=0; l<nc; l++) {
1921             cnt  = 0;
1922             for (ii=istart; ii<iend+1; ii++) {
1923               for (jj=jstart; jj<jend+1; jj++) {
1924                 for (kk=kstart; kk<kend+1; kk++) {
1925                   if (ii || jj || kk) {
1926                     if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1927                       for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++)
1928                         cols[cnt++]  = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1929                     }
1930                   } else {
1931                     if (dfill) {
1932                       for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++)
1933                         cols[cnt++]  = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1934                     } else {
1935                       for (ifill_col=0; ifill_col<nc; ifill_col++)
1936                         cols[cnt++]  = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1937                     }
1938                   }
1939                 }
1940               }
1941             }
1942             row  = l + nc*(slot);
1943             ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1944           }
1945         }
1946       }
1947     }
1948     ierr = PetscFree(values);CHKERRQ(ierr);
1949     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1950     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1951   }
1952   ierr = PetscFree(cols);CHKERRQ(ierr);
1953   PetscFunctionReturn(0);
1954 }
1955