xref: /petsc/src/dm/impls/da/fdda.c (revision 554c65c06a6134e637c3683ef491cf6a9ea5592a)
1 
2 #include <petsc-private/dmdaimpl.h> /*I      "petscdmda.h"     I*/
3 #include <petscmat.h>
4 #include <petsc-private/matimpl.h>
5 
6 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM,ISColoringType,ISColoring*);
7 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM,ISColoringType,ISColoring*);
8 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM,ISColoringType,ISColoring*);
9 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM,ISColoringType,ISColoring*);
10 
11 /*
12    For ghost i that may be negative or greater than the upper bound this
13   maps it into the 0:m-1 range using periodicity
14 */
15 #define SetInRange(i,m) ((i < 0) ? m+i : ((i >= m) ? i-m : i))
16 
17 #undef __FUNCT__
18 #define __FUNCT__ "DMDASetBlockFills_Private"
19 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill,PetscInt w,PetscInt **rfill)
20 {
21   PetscErrorCode ierr;
22   PetscInt       i,j,nz,*fill;
23 
24   PetscFunctionBegin;
25   if (!dfill) PetscFunctionReturn(0);
26 
27   /* count number nonzeros */
28   nz = 0;
29   for (i=0; i<w; i++) {
30     for (j=0; j<w; j++) {
31       if (dfill[w*i+j]) nz++;
32     }
33   }
34   ierr = PetscMalloc((nz + w + 1)*sizeof(PetscInt),&fill);CHKERRQ(ierr);
35   /* construct modified CSR storage of nonzero structure */
36   /*  fill[0 -- w] marks starts of each row of column indices (and end of last row)
37    so fill[1] - fill[0] gives number of nonzeros in first row etc */
38   nz = w + 1;
39   for (i=0; i<w; i++) {
40     fill[i] = nz;
41     for (j=0; j<w; j++) {
42       if (dfill[w*i+j]) {
43         fill[nz] = j;
44         nz++;
45       }
46     }
47   }
48   fill[w] = nz;
49 
50   *rfill = fill;
51   PetscFunctionReturn(0);
52 }
53 
54 #undef __FUNCT__
55 #define __FUNCT__ "DMDASetBlockFills"
56 /*@
57     DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem
58     of the matrix returned by DMCreateMatrix().
59 
60     Logically Collective on DMDA
61 
62     Input Parameter:
63 +   da - the distributed array
64 .   dfill - the fill pattern in the diagonal block (may be NULL, means use dense block)
65 -   ofill - the fill pattern in the off-diagonal blocks
66 
67 
68     Level: developer
69 
70     Notes: This only makes sense when you are doing multicomponent problems but using the
71        MPIAIJ matrix format
72 
73            The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries
74        representing coupling and 0 entries for missing coupling. For example
75 $             dfill[9] = {1, 0, 0,
76 $                         1, 1, 0,
77 $                         0, 1, 1}
78        means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with
79        itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the
80        diagonal block).
81 
82      DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
83      can be represented in the dfill, ofill format
84 
85    Contributed by Glenn Hammond
86 
87 .seealso DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly()
88 
89 @*/
90 PetscErrorCode  DMDASetBlockFills(DM da,const PetscInt *dfill,const PetscInt *ofill)
91 {
92   DM_DA          *dd = (DM_DA*)da->data;
93   PetscErrorCode ierr;
94   PetscInt       i,k,cnt = 1;
95 
96   PetscFunctionBegin;
97   ierr = DMDASetBlockFills_Private(dfill,dd->w,&dd->dfill);CHKERRQ(ierr);
98   ierr = DMDASetBlockFills_Private(ofill,dd->w,&dd->ofill);CHKERRQ(ierr);
99 
100   /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of
101    columns to the left with any nonzeros in them plus 1 */
102   ierr = PetscMalloc(dd->w*sizeof(PetscBool),&dd->ofillcols);CHKERRQ(ierr);
103   ierr = PetscMemzero(dd->ofillcols,dd->w*sizeof(PetscBool));CHKERRQ(ierr);
104   for (i=0; i<dd->w; i++) {
105     for (k=dd->ofill[i]; k<dd->ofill[i+1]; k++) dd->ofillcols[dd->ofill[k]] = 1;
106   }
107   for (i=0; i<dd->w; i++) {
108     if (dd->ofillcols[i]) {
109       dd->ofillcols[i] = cnt++;
110     }
111   }
112   PetscFunctionReturn(0);
113 }
114 
115 
116 #undef __FUNCT__
117 #define __FUNCT__ "DMCreateColoring_DA"
118 PetscErrorCode  DMCreateColoring_DA(DM da,ISColoringType ctype,MatType mtype,ISColoring *coloring)
119 {
120   PetscErrorCode   ierr;
121   PetscInt         dim,m,n,p,nc;
122   DMDABoundaryType bx,by,bz;
123   MPI_Comm         comm;
124   PetscMPIInt      size;
125   PetscBool        isBAIJ;
126   DM_DA            *dd = (DM_DA*)da->data;
127 
128   PetscFunctionBegin;
129   /*
130                                   m
131           ------------------------------------------------------
132          |                                                     |
133          |                                                     |
134          |               ----------------------                |
135          |               |                    |                |
136       n  |           yn  |                    |                |
137          |               |                    |                |
138          |               .---------------------                |
139          |             (xs,ys)     xn                          |
140          |            .                                        |
141          |         (gxs,gys)                                   |
142          |                                                     |
143           -----------------------------------------------------
144   */
145 
146   /*
147          nc - number of components per grid point
148          col - number of colors needed in one direction for single component problem
149 
150   */
151   ierr = DMDAGetInfo(da,&dim,0,0,0,&m,&n,&p,&nc,0,&bx,&by,&bz,0);CHKERRQ(ierr);
152 
153   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
154   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
155   if (ctype == IS_COLORING_GHOSTED) {
156     if (size == 1) {
157       ctype = IS_COLORING_GLOBAL;
158     } else if (dim > 1) {
159       if ((m==1 && bx == DMDA_BOUNDARY_PERIODIC) || (n==1 && by == DMDA_BOUNDARY_PERIODIC) || (p==1 && bz == DMDA_BOUNDARY_PERIODIC)) {
160         SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"IS_COLORING_GHOSTED cannot be used for periodic boundary condition having both ends of the domain  on the same process");
161       }
162     }
163   }
164 
165   /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ
166      matrices is for the blocks, not the individual matrix elements  */
167   ierr = PetscStrcmp(mtype,MATBAIJ,&isBAIJ);CHKERRQ(ierr);
168   if (!isBAIJ) {ierr = PetscStrcmp(mtype,MATMPIBAIJ,&isBAIJ);CHKERRQ(ierr);}
169   if (!isBAIJ) {ierr = PetscStrcmp(mtype,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);}
170   if (isBAIJ) {
171     dd->w  = 1;
172     dd->xs = dd->xs/nc;
173     dd->xe = dd->xe/nc;
174     dd->Xs = dd->Xs/nc;
175     dd->Xe = dd->Xe/nc;
176   }
177 
178   /*
179      We do not provide a getcoloring function in the DMDA operations because
180    the basic DMDA does not know about matrices. We think of DMDA as being more
181    more low-level then matrices.
182   */
183   if (dim == 1) {
184     ierr = DMCreateColoring_DA_1d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
185   } else if (dim == 2) {
186     ierr =  DMCreateColoring_DA_2d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
187   } else if (dim == 3) {
188     ierr =  DMCreateColoring_DA_3d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
189   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not done for %D dimension, send us mail petsc-maint@mcs.anl.gov for code",dim);
190   if (isBAIJ) {
191     dd->w  = nc;
192     dd->xs = dd->xs*nc;
193     dd->xe = dd->xe*nc;
194     dd->Xs = dd->Xs*nc;
195     dd->Xe = dd->Xe*nc;
196   }
197   PetscFunctionReturn(0);
198 }
199 
200 /* ---------------------------------------------------------------------------------*/
201 
202 #undef __FUNCT__
203 #define __FUNCT__ "DMCreateColoring_DA_2d_MPIAIJ"
204 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
205 {
206   PetscErrorCode   ierr;
207   PetscInt         xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,M,N,dim,s,k,nc,col;
208   PetscInt         ncolors;
209   MPI_Comm         comm;
210   DMDABoundaryType bx,by;
211   DMDAStencilType  st;
212   ISColoringValue  *colors;
213   DM_DA            *dd = (DM_DA*)da->data;
214 
215   PetscFunctionBegin;
216   /*
217          nc - number of components per grid point
218          col - number of colors needed in one direction for single component problem
219 
220   */
221   ierr = DMDAGetInfo(da,&dim,&m,&n,0,&M,&N,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
222   col  = 2*s + 1;
223   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
224   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
225   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
226 
227   /* special case as taught to us by Paul Hovland */
228   if (st == DMDA_STENCIL_STAR && s == 1) {
229     ierr = DMCreateColoring_DA_2d_5pt_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
230   } else {
231 
232     if (bx == DMDA_BOUNDARY_PERIODIC && (m % col)) SETERRQ2(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X (%d) is divisible\n\
233                                                             by 2*stencil_width + 1 (%d)\n", m, col);
234     if (by == DMDA_BOUNDARY_PERIODIC && (n % col)) SETERRQ2(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y (%d) is divisible\n\
235                                                             by 2*stencil_width + 1 (%d)\n", n, col);
236     if (ctype == IS_COLORING_GLOBAL) {
237       if (!dd->localcoloring) {
238         ierr = PetscMalloc(nc*nx*ny*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
239         ii   = 0;
240         for (j=ys; j<ys+ny; j++) {
241           for (i=xs; i<xs+nx; i++) {
242             for (k=0; k<nc; k++) {
243               colors[ii++] = k + nc*((i % col) + col*(j % col));
244             }
245           }
246         }
247         ncolors = nc + nc*(col-1 + col*(col-1));
248         ierr    = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,&dd->localcoloring);CHKERRQ(ierr);
249       }
250       *coloring = dd->localcoloring;
251     } else if (ctype == IS_COLORING_GHOSTED) {
252       if (!dd->ghostedcoloring) {
253         ierr = PetscMalloc(nc*gnx*gny*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
254         ii   = 0;
255         for (j=gys; j<gys+gny; j++) {
256           for (i=gxs; i<gxs+gnx; i++) {
257             for (k=0; k<nc; k++) {
258               /* the complicated stuff is to handle periodic boundaries */
259               colors[ii++] = k + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col));
260             }
261           }
262         }
263         ncolors = nc + nc*(col - 1 + col*(col-1));
264         ierr    = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,&dd->ghostedcoloring);CHKERRQ(ierr);
265         /* PetscIntView(ncolors,(PetscInt*)colors,0); */
266 
267         ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_GHOSTED);CHKERRQ(ierr);
268       }
269       *coloring = dd->ghostedcoloring;
270     } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
271   }
272   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
273   PetscFunctionReturn(0);
274 }
275 
276 /* ---------------------------------------------------------------------------------*/
277 
278 #undef __FUNCT__
279 #define __FUNCT__ "DMCreateColoring_DA_3d_MPIAIJ"
280 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
281 {
282   PetscErrorCode   ierr;
283   PetscInt         xs,ys,nx,ny,i,j,gxs,gys,gnx,gny,m,n,p,dim,s,k,nc,col,zs,gzs,ii,l,nz,gnz,M,N,P;
284   PetscInt         ncolors;
285   MPI_Comm         comm;
286   DMDABoundaryType bx,by,bz;
287   DMDAStencilType  st;
288   ISColoringValue  *colors;
289   DM_DA            *dd = (DM_DA*)da->data;
290 
291   PetscFunctionBegin;
292   /*
293          nc - number of components per grid point
294          col - number of colors needed in one direction for single component problem
295 
296   */
297   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
298   col  = 2*s + 1;
299   if (bx == DMDA_BOUNDARY_PERIODIC && (m % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
300                                                          by 2*stencil_width + 1\n");
301   if (by == DMDA_BOUNDARY_PERIODIC && (n % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
302                                                          by 2*stencil_width + 1\n");
303   if (bz == DMDA_BOUNDARY_PERIODIC && (p % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
304                                                          by 2*stencil_width + 1\n");
305 
306   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
307   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
308   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
309 
310   /* create the coloring */
311   if (ctype == IS_COLORING_GLOBAL) {
312     if (!dd->localcoloring) {
313       ierr = PetscMalloc(nc*nx*ny*nz*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
314       ii   = 0;
315       for (k=zs; k<zs+nz; k++) {
316         for (j=ys; j<ys+ny; j++) {
317           for (i=xs; i<xs+nx; i++) {
318             for (l=0; l<nc; l++) {
319               colors[ii++] = l + nc*((i % col) + col*(j % col) + col*col*(k % col));
320             }
321           }
322         }
323       }
324       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
325       ierr    = ISColoringCreate(comm,ncolors,nc*nx*ny*nz,colors,&dd->localcoloring);CHKERRQ(ierr);
326     }
327     *coloring = dd->localcoloring;
328   } else if (ctype == IS_COLORING_GHOSTED) {
329     if (!dd->ghostedcoloring) {
330       ierr = PetscMalloc(nc*gnx*gny*gnz*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
331       ii   = 0;
332       for (k=gzs; k<gzs+gnz; k++) {
333         for (j=gys; j<gys+gny; j++) {
334           for (i=gxs; i<gxs+gnx; i++) {
335             for (l=0; l<nc; l++) {
336               /* the complicated stuff is to handle periodic boundaries */
337               colors[ii++] = l + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col) + col*col*(SetInRange(k,p) % col));
338             }
339           }
340         }
341       }
342       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
343       ierr    = ISColoringCreate(comm,ncolors,nc*gnx*gny*gnz,colors,&dd->ghostedcoloring);CHKERRQ(ierr);
344       ierr    = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_GHOSTED);CHKERRQ(ierr);
345     }
346     *coloring = dd->ghostedcoloring;
347   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
348   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
349   PetscFunctionReturn(0);
350 }
351 
352 /* ---------------------------------------------------------------------------------*/
353 
354 #undef __FUNCT__
355 #define __FUNCT__ "DMCreateColoring_DA_1d_MPIAIJ"
356 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
357 {
358   PetscErrorCode   ierr;
359   PetscInt         xs,nx,i,i1,gxs,gnx,l,m,M,dim,s,nc,col;
360   PetscInt         ncolors;
361   MPI_Comm         comm;
362   DMDABoundaryType bx;
363   ISColoringValue  *colors;
364   DM_DA            *dd = (DM_DA*)da->data;
365 
366   PetscFunctionBegin;
367   /*
368          nc - number of components per grid point
369          col - number of colors needed in one direction for single component problem
370 
371   */
372   ierr = DMDAGetInfo(da,&dim,&m,0,0,&M,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr);
373   col  = 2*s + 1;
374 
375   if (bx == DMDA_BOUNDARY_PERIODIC && (m % col)) SETERRQ2(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points %d is divisible\n\
376                                                           by 2*stencil_width + 1 %d\n",(int)m,(int)col);
377 
378   ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
379   ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
380   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
381 
382   /* create the coloring */
383   if (ctype == IS_COLORING_GLOBAL) {
384     if (!dd->localcoloring) {
385       ierr = PetscMalloc(nc*nx*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
386       if (dd->ofillcols) {
387         PetscInt tc = 0;
388         for (i=0; i<nc; i++) tc += (PetscInt) (dd->ofillcols[i] > 0);
389         i1 = 0;
390         for (i=xs; i<xs+nx; i++) {
391           for (l=0; l<nc; l++) {
392             if (dd->ofillcols[l] && (i % col)) {
393               colors[i1++] =  nc - 1 + tc*((i % col) - 1) + dd->ofillcols[l];
394             } else {
395               colors[i1++] = l;
396             }
397           }
398         }
399         ncolors = nc + 2*s*tc;
400       } else {
401         i1 = 0;
402         for (i=xs; i<xs+nx; i++) {
403           for (l=0; l<nc; l++) {
404             colors[i1++] = l + nc*(i % col);
405           }
406         }
407         ncolors = nc + nc*(col-1);
408       }
409       ierr = ISColoringCreate(comm,ncolors,nc*nx,colors,&dd->localcoloring);CHKERRQ(ierr);
410     }
411     *coloring = dd->localcoloring;
412   } else if (ctype == IS_COLORING_GHOSTED) {
413     if (!dd->ghostedcoloring) {
414       ierr = PetscMalloc(nc*gnx*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
415       i1   = 0;
416       for (i=gxs; i<gxs+gnx; i++) {
417         for (l=0; l<nc; l++) {
418           /* the complicated stuff is to handle periodic boundaries */
419           colors[i1++] = l + nc*(SetInRange(i,m) % col);
420         }
421       }
422       ncolors = nc + nc*(col-1);
423       ierr    = ISColoringCreate(comm,ncolors,nc*gnx,colors,&dd->ghostedcoloring);CHKERRQ(ierr);
424       ierr    = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_GHOSTED);CHKERRQ(ierr);
425     }
426     *coloring = dd->ghostedcoloring;
427   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
428   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
429   PetscFunctionReturn(0);
430 }
431 
432 #undef __FUNCT__
433 #define __FUNCT__ "DMCreateColoring_DA_2d_5pt_MPIAIJ"
434 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
435 {
436   PetscErrorCode   ierr;
437   PetscInt         xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,dim,s,k,nc;
438   PetscInt         ncolors;
439   MPI_Comm         comm;
440   DMDABoundaryType bx,by;
441   ISColoringValue  *colors;
442   DM_DA            *dd = (DM_DA*)da->data;
443 
444   PetscFunctionBegin;
445   /*
446          nc - number of components per grid point
447          col - number of colors needed in one direction for single component problem
448 
449   */
450   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,0);CHKERRQ(ierr);
451   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
452   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
453   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
454 
455   if (bx == DMDA_BOUNDARY_PERIODIC && (m % 5)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible by 5\n");
456   if (by == DMDA_BOUNDARY_PERIODIC && (n % 5)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible by 5\n");
457 
458   /* create the coloring */
459   if (ctype == IS_COLORING_GLOBAL) {
460     if (!dd->localcoloring) {
461       ierr = PetscMalloc(nc*nx*ny*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
462       ii   = 0;
463       for (j=ys; j<ys+ny; j++) {
464         for (i=xs; i<xs+nx; i++) {
465           for (k=0; k<nc; k++) {
466             colors[ii++] = k + nc*((3*j+i) % 5);
467           }
468         }
469       }
470       ncolors = 5*nc;
471       ierr    = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,&dd->localcoloring);CHKERRQ(ierr);
472     }
473     *coloring = dd->localcoloring;
474   } else if (ctype == IS_COLORING_GHOSTED) {
475     if (!dd->ghostedcoloring) {
476       ierr = PetscMalloc(nc*gnx*gny*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
477       ii = 0;
478       for (j=gys; j<gys+gny; j++) {
479         for (i=gxs; i<gxs+gnx; i++) {
480           for (k=0; k<nc; k++) {
481             colors[ii++] = k + nc*((3*SetInRange(j,n) + SetInRange(i,m)) % 5);
482           }
483         }
484       }
485       ncolors = 5*nc;
486       ierr    = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,&dd->ghostedcoloring);CHKERRQ(ierr);
487       ierr    = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_GHOSTED);CHKERRQ(ierr);
488     }
489     *coloring = dd->ghostedcoloring;
490   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
491   PetscFunctionReturn(0);
492 }
493 
494 /* =========================================================================== */
495 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM,Mat);
496 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM,Mat);
497 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM,Mat);
498 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM,Mat);
499 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM,Mat);
500 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM,Mat);
501 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM,Mat);
502 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM,Mat);
503 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM,Mat);
504 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM,Mat);
505 
506 #undef __FUNCT__
507 #define __FUNCT__ "MatSetupDM"
508 /*@C
509    MatSetupDM - Sets the DMDA that is to be used by the HYPRE_StructMatrix PETSc matrix
510 
511    Logically Collective on Mat
512 
513    Input Parameters:
514 +  mat - the matrix
515 -  da - the da
516 
517    Level: intermediate
518 
519 @*/
520 PetscErrorCode MatSetupDM(Mat mat,DM da)
521 {
522   PetscErrorCode ierr;
523 
524   PetscFunctionBegin;
525   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
526   PetscValidHeaderSpecific(da,DM_CLASSID,1);
527   ierr = PetscTryMethod(mat,"MatSetupDM_C",(Mat,DM),(mat,da));CHKERRQ(ierr);
528   PetscFunctionReturn(0);
529 }
530 
531 #undef __FUNCT__
532 #define __FUNCT__ "MatView_MPI_DA"
533 PetscErrorCode  MatView_MPI_DA(Mat A,PetscViewer viewer)
534 {
535   DM                da;
536   PetscErrorCode    ierr;
537   const char        *prefix;
538   Mat               Anatural;
539   AO                ao;
540   PetscInt          rstart,rend,*petsc,i;
541   IS                is;
542   MPI_Comm          comm;
543   PetscViewerFormat format;
544 
545   PetscFunctionBegin;
546   /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */
547   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
548   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(0);
549 
550   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
551   ierr = MatGetDM(A, &da);CHKERRQ(ierr);
552   if (!da) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
553 
554   ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);
555   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
556   ierr = PetscMalloc((rend-rstart)*sizeof(PetscInt),&petsc);CHKERRQ(ierr);
557   for (i=rstart; i<rend; i++) petsc[i-rstart] = i;
558   ierr = AOApplicationToPetsc(ao,rend-rstart,petsc);CHKERRQ(ierr);
559   ierr = ISCreateGeneral(comm,rend-rstart,petsc,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
560 
561   /* call viewer on natural ordering */
562   ierr = MatGetSubMatrix(A,is,is,MAT_INITIAL_MATRIX,&Anatural);CHKERRQ(ierr);
563   ierr = ISDestroy(&is);CHKERRQ(ierr);
564   ierr = PetscObjectGetOptionsPrefix((PetscObject)A,&prefix);CHKERRQ(ierr);
565   ierr = PetscObjectSetOptionsPrefix((PetscObject)Anatural,prefix);CHKERRQ(ierr);
566   ierr = PetscObjectSetName((PetscObject)Anatural,((PetscObject)A)->name);CHKERRQ(ierr);
567   ierr = MatView(Anatural,viewer);CHKERRQ(ierr);
568   ierr = MatDestroy(&Anatural);CHKERRQ(ierr);
569   PetscFunctionReturn(0);
570 }
571 
572 #undef __FUNCT__
573 #define __FUNCT__ "MatLoad_MPI_DA"
574 PetscErrorCode  MatLoad_MPI_DA(Mat A,PetscViewer viewer)
575 {
576   DM             da;
577   PetscErrorCode ierr;
578   Mat            Anatural,Aapp;
579   AO             ao;
580   PetscInt       rstart,rend,*app,i;
581   IS             is;
582   MPI_Comm       comm;
583 
584   PetscFunctionBegin;
585   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
586   ierr = MatGetDM(A, &da);CHKERRQ(ierr);
587   if (!da) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
588 
589   /* Load the matrix in natural ordering */
590   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Anatural);CHKERRQ(ierr);
591   ierr = MatSetType(Anatural,((PetscObject)A)->type_name);CHKERRQ(ierr);
592   ierr = MatSetSizes(Anatural,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
593   ierr = MatLoad(Anatural,viewer);CHKERRQ(ierr);
594 
595   /* Map natural ordering to application ordering and create IS */
596   ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);
597   ierr = MatGetOwnershipRange(Anatural,&rstart,&rend);CHKERRQ(ierr);
598   ierr = PetscMalloc((rend-rstart)*sizeof(PetscInt),&app);CHKERRQ(ierr);
599   for (i=rstart; i<rend; i++) app[i-rstart] = i;
600   ierr = AOPetscToApplication(ao,rend-rstart,app);CHKERRQ(ierr);
601   ierr = ISCreateGeneral(comm,rend-rstart,app,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
602 
603   /* Do permutation and replace header */
604   ierr = MatGetSubMatrix(Anatural,is,is,MAT_INITIAL_MATRIX,&Aapp);CHKERRQ(ierr);
605   ierr = MatHeaderReplace(A,Aapp);CHKERRQ(ierr);
606   ierr = ISDestroy(&is);CHKERRQ(ierr);
607   ierr = MatDestroy(&Anatural);CHKERRQ(ierr);
608   PetscFunctionReturn(0);
609 }
610 
611 #undef __FUNCT__
612 #define __FUNCT__ "DMCreateMatrix_DA"
613 PetscErrorCode DMCreateMatrix_DA(DM da, MatType mtype,Mat *J)
614 {
615   PetscErrorCode ierr;
616   PetscInt       dim,dof,nx,ny,nz,dims[3],starts[3],M,N,P;
617   Mat            A;
618   MPI_Comm       comm;
619   MatType        Atype;
620   PetscSection   section, sectionGlobal;
621   void           (*aij)(void)=NULL,(*baij)(void)=NULL,(*sbaij)(void)=NULL;
622   MatType        ttype[256];
623   PetscBool      flg;
624   PetscMPIInt    size;
625   DM_DA          *dd = (DM_DA*)da->data;
626 
627   PetscFunctionBegin;
628 #if !defined(PETSC_USE_DYNAMIC_LIBRARIES)
629   ierr = MatInitializePackage();CHKERRQ(ierr);
630 #endif
631   if (!mtype) mtype = MATAIJ;
632   ierr = PetscStrcpy((char*)ttype,mtype);CHKERRQ(ierr);
633   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)da),((PetscObject)da)->prefix,"DMDA options","Mat");CHKERRQ(ierr);
634   ierr = PetscOptionsList("-dm_mat_type","Matrix type","MatSetType",MatList,mtype,(char*)ttype,256,&flg);CHKERRQ(ierr);
635   ierr = PetscOptionsEnd();
636 
637   ierr = DMGetDefaultSection(da, &section);CHKERRQ(ierr);
638   if (section) {
639     PetscInt  bs = -1;
640     PetscInt  localSize;
641     PetscBool isShell, isBlock, isSeqBlock, isMPIBlock, isSymBlock, isSymSeqBlock, isSymMPIBlock, isSymmetric;
642 
643     ierr = DMGetDefaultGlobalSection(da, &sectionGlobal);CHKERRQ(ierr);
644     ierr = PetscSectionGetConstrainedStorageSize(sectionGlobal, &localSize);CHKERRQ(ierr);
645     ierr = MatCreate(PetscObjectComm((PetscObject)da), J);CHKERRQ(ierr);
646     ierr = MatSetSizes(*J, localSize, localSize, PETSC_DETERMINE, PETSC_DETERMINE);CHKERRQ(ierr);
647     ierr = MatSetType(*J, mtype);CHKERRQ(ierr);
648     ierr = MatSetFromOptions(*J);CHKERRQ(ierr);
649     ierr = PetscStrcmp(mtype, MATSHELL, &isShell);CHKERRQ(ierr);
650     ierr = PetscStrcmp(mtype, MATBAIJ, &isBlock);CHKERRQ(ierr);
651     ierr = PetscStrcmp(mtype, MATSEQBAIJ, &isSeqBlock);CHKERRQ(ierr);
652     ierr = PetscStrcmp(mtype, MATMPIBAIJ, &isMPIBlock);CHKERRQ(ierr);
653     ierr = PetscStrcmp(mtype, MATSBAIJ, &isSymBlock);CHKERRQ(ierr);
654     ierr = PetscStrcmp(mtype, MATSEQSBAIJ, &isSymSeqBlock);CHKERRQ(ierr);
655     ierr = PetscStrcmp(mtype, MATMPISBAIJ, &isSymMPIBlock);CHKERRQ(ierr);
656     /* Check for symmetric storage */
657     isSymmetric = (PetscBool) (isSymBlock || isSymSeqBlock || isSymMPIBlock);
658     if (isSymmetric) {
659       ierr = MatSetOption(*J, MAT_IGNORE_LOWER_TRIANGULAR, PETSC_TRUE);CHKERRQ(ierr);
660     }
661     if (!isShell) {
662       PetscInt *dnz, *onz, *dnzu, *onzu, bsLocal;
663 
664       if (bs < 0) {
665         if (isBlock || isSeqBlock || isMPIBlock || isSymBlock || isSymSeqBlock || isSymMPIBlock) {
666           PetscInt pStart, pEnd, p, dof;
667 
668           ierr = PetscSectionGetChart(sectionGlobal, &pStart, &pEnd);CHKERRQ(ierr);
669           for (p = pStart; p < pEnd; ++p) {
670             ierr = PetscSectionGetDof(sectionGlobal, p, &dof);CHKERRQ(ierr);
671             if (dof) {
672               bs = dof;
673               break;
674             }
675           }
676         } else {
677           bs = 1;
678         }
679         /* Must have same blocksize on all procs (some might have no points) */
680         bsLocal = bs;
681         ierr    = MPI_Allreduce(&bsLocal, &bs, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)da));CHKERRQ(ierr);
682       }
683       ierr = PetscMalloc4(localSize/bs, PetscInt, &dnz, localSize/bs, PetscInt, &onz, localSize/bs, PetscInt, &dnzu, localSize/bs, PetscInt, &onzu);CHKERRQ(ierr);
684       ierr = PetscMemzero(dnz,  localSize/bs * sizeof(PetscInt));CHKERRQ(ierr);
685       ierr = PetscMemzero(onz,  localSize/bs * sizeof(PetscInt));CHKERRQ(ierr);
686       ierr = PetscMemzero(dnzu, localSize/bs * sizeof(PetscInt));CHKERRQ(ierr);
687       ierr = PetscMemzero(onzu, localSize/bs * sizeof(PetscInt));CHKERRQ(ierr);
688       /* ierr = DMPlexPreallocateOperator(dm, bs, section, sectionGlobal, dnz, onz, dnzu, onzu, *J, fillMatrix);CHKERRQ(ierr); */
689       ierr = PetscFree4(dnz, onz, dnzu, onzu);CHKERRQ(ierr);
690     }
691   }
692   /*
693                                   m
694           ------------------------------------------------------
695          |                                                     |
696          |                                                     |
697          |               ----------------------                |
698          |               |                    |                |
699       n  |           ny  |                    |                |
700          |               |                    |                |
701          |               .---------------------                |
702          |             (xs,ys)     nx                          |
703          |            .                                        |
704          |         (gxs,gys)                                   |
705          |                                                     |
706           -----------------------------------------------------
707   */
708 
709   /*
710          nc - number of components per grid point
711          col - number of colors needed in one direction for single component problem
712 
713   */
714   M   = dd->M;
715   N   = dd->N;
716   P   = dd->P;
717   dim = dd->dim;
718   dof = dd->w;
719   /* ierr = DMDAGetInfo(da,&dim,&M,&N,&P,0,0,0,&dof,0,0,0,0,0);CHKERRQ(ierr); */
720   ierr = DMDAGetCorners(da,0,0,0,&nx,&ny,&nz);CHKERRQ(ierr);
721   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
722   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
723   ierr = MatSetSizes(A,dof*nx*ny*nz,dof*nx*ny*nz,dof*M*N*P,dof*M*N*P);CHKERRQ(ierr);
724   ierr = MatSetType(A,(MatType)ttype);CHKERRQ(ierr);
725   ierr = MatSetDM(A,da);CHKERRQ(ierr);
726   ierr = MatSetFromOptions(A);CHKERRQ(ierr);
727   ierr = MatGetType(A,&Atype);CHKERRQ(ierr);
728   /*
729      We do not provide a getmatrix function in the DMDA operations because
730    the basic DMDA does not know about matrices. We think of DMDA as being more
731    more low-level than matrices. This is kind of cheating but, cause sometimes
732    we think of DMDA has higher level than matrices.
733 
734      We could switch based on Atype (or mtype), but we do not since the
735    specialized setting routines depend only the particular preallocation
736    details of the matrix, not the type itself.
737   */
738   ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJSetPreallocation_C",&aij);CHKERRQ(ierr);
739   if (!aij) {
740     ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJSetPreallocation_C",&aij);CHKERRQ(ierr);
741   }
742   if (!aij) {
743     ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIBAIJSetPreallocation_C",&baij);CHKERRQ(ierr);
744     if (!baij) {
745       ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",&baij);CHKERRQ(ierr);
746     }
747     if (!baij) {
748       ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPISBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr);
749       if (!sbaij) {
750         ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr);
751       }
752     }
753   }
754   if (aij) {
755     if (dim == 1) {
756       if (dd->ofill) {
757         ierr = DMCreateMatrix_DA_1d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
758       } else {
759         ierr = DMCreateMatrix_DA_1d_MPIAIJ(da,A);CHKERRQ(ierr);
760       }
761     } else if (dim == 2) {
762       if (dd->ofill) {
763         ierr = DMCreateMatrix_DA_2d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
764       } else {
765         ierr = DMCreateMatrix_DA_2d_MPIAIJ(da,A);CHKERRQ(ierr);
766       }
767     } else if (dim == 3) {
768       if (dd->ofill) {
769         ierr = DMCreateMatrix_DA_3d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
770       } else {
771         ierr = DMCreateMatrix_DA_3d_MPIAIJ(da,A);CHKERRQ(ierr);
772       }
773     }
774   } else if (baij) {
775     if (dim == 2) {
776       ierr = DMCreateMatrix_DA_2d_MPIBAIJ(da,A);CHKERRQ(ierr);
777     } else if (dim == 3) {
778       ierr = DMCreateMatrix_DA_3d_MPIBAIJ(da,A);CHKERRQ(ierr);
779     } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
780   } else if (sbaij) {
781     if (dim == 2) {
782       ierr = DMCreateMatrix_DA_2d_MPISBAIJ(da,A);CHKERRQ(ierr);
783     } else if (dim == 3) {
784       ierr = DMCreateMatrix_DA_3d_MPISBAIJ(da,A);CHKERRQ(ierr);
785     } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
786   } else {
787     ISLocalToGlobalMapping ltog,ltogb;
788     ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
789     ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
790     ierr = MatSetUp(A);CHKERRQ(ierr);
791     ierr = MatSetLocalToGlobalMapping(A,ltog,ltog);CHKERRQ(ierr);
792     ierr = MatSetLocalToGlobalMappingBlock(A,ltogb,ltogb);CHKERRQ(ierr);
793   }
794   ierr = DMDAGetGhostCorners(da,&starts[0],&starts[1],&starts[2],&dims[0],&dims[1],&dims[2]);CHKERRQ(ierr);
795   ierr = MatSetStencil(A,dim,dims,starts,dof);CHKERRQ(ierr);
796   ierr = MatSetDM(A,da);CHKERRQ(ierr);
797   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
798   if (size > 1) {
799     /* change viewer to display matrix in natural ordering */
800     ierr = MatShellSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA);CHKERRQ(ierr);
801     ierr = MatShellSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA);CHKERRQ(ierr);
802   }
803   *J = A;
804   PetscFunctionReturn(0);
805 }
806 
807 /* ---------------------------------------------------------------------------------*/
808 #undef __FUNCT__
809 #define __FUNCT__ "DMCreateMatrix_DA_2d_MPIAIJ"
810 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da,Mat J)
811 {
812   PetscErrorCode         ierr;
813   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p;
814   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
815   MPI_Comm               comm;
816   PetscScalar            *values;
817   DMDABoundaryType       bx,by;
818   ISLocalToGlobalMapping ltog,ltogb;
819   DMDAStencilType        st;
820 
821   PetscFunctionBegin;
822   /*
823          nc - number of components per grid point
824          col - number of colors needed in one direction for single component problem
825 
826   */
827   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
828   col  = 2*s + 1;
829   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
830   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
831   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
832 
833   ierr = PetscMalloc2(nc,PetscInt,&rows,col*col*nc*nc,PetscInt,&cols);CHKERRQ(ierr);
834   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
835   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
836 
837   /* determine the matrix preallocation information */
838   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
839   for (i=xs; i<xs+nx; i++) {
840 
841     pstart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
842     pend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
843 
844     for (j=ys; j<ys+ny; j++) {
845       slot = i - gxs + gnx*(j - gys);
846 
847       lstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
848       lend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
849 
850       cnt = 0;
851       for (k=0; k<nc; k++) {
852         for (l=lstart; l<lend+1; l++) {
853           for (p=pstart; p<pend+1; p++) {
854             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
855               cols[cnt++] = k + nc*(slot + gnx*l + p);
856             }
857           }
858         }
859         rows[k] = k + nc*(slot);
860       }
861       ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
862     }
863   }
864   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
865   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
866   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
867   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
868 
869   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
870   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
871 
872   /*
873     For each node in the grid: we get the neighbors in the local (on processor ordering
874     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
875     PETSc ordering.
876   */
877   if (!da->prealloc_only) {
878     ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
879     ierr = PetscMemzero(values,col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
880     for (i=xs; i<xs+nx; i++) {
881 
882       pstart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
883       pend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
884 
885       for (j=ys; j<ys+ny; j++) {
886         slot = i - gxs + gnx*(j - gys);
887 
888         lstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
889         lend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
890 
891         cnt = 0;
892         for (k=0; k<nc; k++) {
893           for (l=lstart; l<lend+1; l++) {
894             for (p=pstart; p<pend+1; p++) {
895               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
896                 cols[cnt++] = k + nc*(slot + gnx*l + p);
897               }
898             }
899           }
900           rows[k] = k + nc*(slot);
901         }
902         ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
903       }
904     }
905     ierr = PetscFree(values);CHKERRQ(ierr);
906     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
907     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
908   }
909   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
910   PetscFunctionReturn(0);
911 }
912 
913 #undef __FUNCT__
914 #define __FUNCT__ "DMCreateMatrix_DA_2d_MPIAIJ_Fill"
915 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da,Mat J)
916 {
917   PetscErrorCode         ierr;
918   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
919   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt,l,p;
920   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
921   DM_DA                  *dd = (DM_DA*)da->data;
922   PetscInt               ifill_col,*ofill = dd->ofill, *dfill = dd->dfill;
923   MPI_Comm               comm;
924   PetscScalar            *values;
925   DMDABoundaryType       bx,by;
926   ISLocalToGlobalMapping ltog,ltogb;
927   DMDAStencilType        st;
928 
929   PetscFunctionBegin;
930   /*
931          nc - number of components per grid point
932          col - number of colors needed in one direction for single component problem
933 
934   */
935   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
936   col  = 2*s + 1;
937   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
938   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
939   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
940 
941   ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscInt),&cols);CHKERRQ(ierr);
942   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
943   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
944 
945   /* determine the matrix preallocation information */
946   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
947   for (i=xs; i<xs+nx; i++) {
948 
949     pstart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
950     pend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
951 
952     for (j=ys; j<ys+ny; j++) {
953       slot = i - gxs + gnx*(j - gys);
954 
955       lstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
956       lend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
957 
958       for (k=0; k<nc; k++) {
959         cnt = 0;
960         for (l=lstart; l<lend+1; l++) {
961           for (p=pstart; p<pend+1; p++) {
962             if (l || p) {
963               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
964                 for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p);
965               }
966             } else {
967               if (dfill) {
968                 for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p);
969               } else {
970                 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p);
971               }
972             }
973           }
974         }
975         row  = k + nc*(slot);
976         ierr = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
977       }
978     }
979   }
980   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
981   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
982   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
983   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
984   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
985 
986   /*
987     For each node in the grid: we get the neighbors in the local (on processor ordering
988     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
989     PETSc ordering.
990   */
991   if (!da->prealloc_only) {
992     ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
993     ierr = PetscMemzero(values,col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
994     for (i=xs; i<xs+nx; i++) {
995 
996       pstart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
997       pend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
998 
999       for (j=ys; j<ys+ny; j++) {
1000         slot = i - gxs + gnx*(j - gys);
1001 
1002         lstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1003         lend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1004 
1005         for (k=0; k<nc; k++) {
1006           cnt = 0;
1007           for (l=lstart; l<lend+1; l++) {
1008             for (p=pstart; p<pend+1; p++) {
1009               if (l || p) {
1010                 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
1011                   for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p);
1012                 }
1013               } else {
1014                 if (dfill) {
1015                   for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p);
1016                 } else {
1017                   for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p);
1018                 }
1019               }
1020             }
1021           }
1022           row  = k + nc*(slot);
1023           ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1024         }
1025       }
1026     }
1027     ierr = PetscFree(values);CHKERRQ(ierr);
1028     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1029     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1030   }
1031   ierr = PetscFree(cols);CHKERRQ(ierr);
1032   PetscFunctionReturn(0);
1033 }
1034 
1035 /* ---------------------------------------------------------------------------------*/
1036 
1037 #undef __FUNCT__
1038 #define __FUNCT__ "DMCreateMatrix_DA_3d_MPIAIJ"
1039 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da,Mat J)
1040 {
1041   PetscErrorCode         ierr;
1042   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1043   PetscInt               m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL;
1044   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1045   MPI_Comm               comm;
1046   PetscScalar            *values;
1047   DMDABoundaryType       bx,by,bz;
1048   ISLocalToGlobalMapping ltog,ltogb;
1049   DMDAStencilType        st;
1050 
1051   PetscFunctionBegin;
1052   /*
1053          nc - number of components per grid point
1054          col - number of colors needed in one direction for single component problem
1055 
1056   */
1057   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1058   col  = 2*s + 1;
1059 
1060   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1061   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1062   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1063 
1064   ierr = PetscMalloc2(nc,PetscInt,&rows,col*col*col*nc*nc,PetscInt,&cols);CHKERRQ(ierr);
1065   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1066   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1067 
1068   /* determine the matrix preallocation information */
1069   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1070   for (i=xs; i<xs+nx; i++) {
1071     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1072     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1073     for (j=ys; j<ys+ny; j++) {
1074       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1075       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1076       for (k=zs; k<zs+nz; k++) {
1077         kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1078         kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1079 
1080         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1081 
1082         cnt = 0;
1083         for (l=0; l<nc; l++) {
1084           for (ii=istart; ii<iend+1; ii++) {
1085             for (jj=jstart; jj<jend+1; jj++) {
1086               for (kk=kstart; kk<kend+1; kk++) {
1087                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1088                   cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1089                 }
1090               }
1091             }
1092           }
1093           rows[l] = l + nc*(slot);
1094         }
1095         ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1096       }
1097     }
1098   }
1099   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1100   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1101   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1102   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1103   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1104   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1105 
1106   /*
1107     For each node in the grid: we get the neighbors in the local (on processor ordering
1108     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1109     PETSc ordering.
1110   */
1111   if (!da->prealloc_only) {
1112     ierr = PetscMalloc(col*col*col*nc*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1113     ierr = PetscMemzero(values,col*col*col*nc*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1114     for (i=xs; i<xs+nx; i++) {
1115       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1116       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1117       for (j=ys; j<ys+ny; j++) {
1118         jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1119         jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1120         for (k=zs; k<zs+nz; k++) {
1121           kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1122           kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1123 
1124           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1125 
1126           cnt = 0;
1127           for (l=0; l<nc; l++) {
1128             for (ii=istart; ii<iend+1; ii++) {
1129               for (jj=jstart; jj<jend+1; jj++) {
1130                 for (kk=kstart; kk<kend+1; kk++) {
1131                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1132                     cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1133                   }
1134                 }
1135               }
1136             }
1137             rows[l] = l + nc*(slot);
1138           }
1139           ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1140         }
1141       }
1142     }
1143     ierr = PetscFree(values);CHKERRQ(ierr);
1144     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1145     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1146   }
1147   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1148   PetscFunctionReturn(0);
1149 }
1150 
1151 /* ---------------------------------------------------------------------------------*/
1152 
1153 #undef __FUNCT__
1154 #define __FUNCT__ "DMCreateMatrix_DA_1d_MPIAIJ_Fill"
1155 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da,Mat J)
1156 {
1157   PetscErrorCode         ierr;
1158   DM_DA                  *dd = (DM_DA*)da->data;
1159   PetscInt               xs,nx,i,j,gxs,gnx,row,k,l;
1160   PetscInt               m,dim,s,*cols = NULL,nc,col,cnt,*ocols;
1161   PetscInt               *ofill = dd->ofill,*dfill = dd->dfill;
1162   PetscScalar            *values;
1163   DMDABoundaryType       bx;
1164   ISLocalToGlobalMapping ltog,ltogb;
1165   PetscMPIInt            rank,size;
1166 
1167   PetscFunctionBegin;
1168   if (dd->bx == DMDA_BOUNDARY_PERIODIC) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"With fill provided not implemented with periodic boundary conditions");
1169   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)da),&rank);CHKERRQ(ierr);
1170   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)da),&size);CHKERRQ(ierr);
1171 
1172   /*
1173          nc - number of components per grid point
1174          col - number of colors needed in one direction for single component problem
1175 
1176   */
1177   ierr = DMDAGetInfo(da,&dim,&m,0,0,0,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr);
1178   col  = 2*s + 1;
1179 
1180   ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
1181   ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
1182 
1183   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1184   ierr = PetscMalloc2(nx*nc,PetscInt,&cols,nx*nc,PetscInt,&ocols);CHKERRQ(ierr);
1185   ierr = PetscMemzero(cols,nx*nc*sizeof(PetscInt));CHKERRQ(ierr);
1186   ierr = PetscMemzero(ocols,nx*nc*sizeof(PetscInt));CHKERRQ(ierr);
1187 
1188   if (nx < 2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Need at least two grid points per process");
1189   /*
1190         note should be smaller for first and last process with no periodic
1191         does not handle dfill
1192   */
1193   cnt = 0;
1194   /* coupling with process to the left */
1195   for (i=0; i<s; i++) {
1196     for (j=0; j<nc; j++) {
1197       ocols[cnt] = ((!rank) ? 0 : (s - i)*(ofill[j+1] - ofill[j]));
1198       cols[cnt]  = dfill[j+1] - dfill[j] + (s + i)*(ofill[j+1] - ofill[j]);
1199       cnt++;
1200     }
1201   }
1202   for (i=s; i<nx-s; i++) {
1203     for (j=0; j<nc; j++) {
1204       cols[cnt] = dfill[j+1] - dfill[j] + 2*s*(ofill[j+1] - ofill[j]);
1205       cnt++;
1206     }
1207   }
1208   /* coupling with process to the right */
1209   for (i=nx-s; i<nx; i++) {
1210     for (j=0; j<nc; j++) {
1211       ocols[cnt] = ((rank == (size-1)) ? 0 : (i - nx + s + 1)*(ofill[j+1] - ofill[j]));
1212       cols[cnt]  = dfill[j+1] - dfill[j] + (s + nx - i - 1)*(ofill[j+1] - ofill[j]);
1213       cnt++;
1214     }
1215   }
1216 
1217   ierr = MatSeqAIJSetPreallocation(J,0,cols);CHKERRQ(ierr);
1218   ierr = MatMPIAIJSetPreallocation(J,0,cols,0,ocols);CHKERRQ(ierr);
1219   ierr = PetscFree2(cols,ocols);CHKERRQ(ierr);
1220 
1221   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1222   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1223   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1224   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1225 
1226   /*
1227     For each node in the grid: we get the neighbors in the local (on processor ordering
1228     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1229     PETSc ordering.
1230   */
1231   if (!da->prealloc_only) {
1232     ierr = PetscMalloc(col*nc*nc*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1233     ierr = PetscMalloc(col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1234     ierr = PetscMemzero(values,col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1235 
1236     row = xs*nc;
1237     /* coupling with process to the left */
1238     for (i=xs; i<xs+s; i++) {
1239       for (j=0; j<nc; j++) {
1240         cnt = 0;
1241         if (rank) {
1242           for (l=0; l<s; l++) {
1243             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1244           }
1245         }
1246         if (dfill) {
1247           for (k=dfill[j]; k<dfill[j+1]; k++) {
1248             cols[cnt++] = i*nc + dfill[k];
1249           }
1250         } else {
1251           for (k=0; k<nc; k++) {
1252             cols[cnt++] = i*nc + k;
1253           }
1254         }
1255         for (l=0; l<s; l++) {
1256           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1257         }
1258         ierr = MatSetValues(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1259         row++;
1260       }
1261     }
1262     for (i=xs+s; i<xs+nx-s; i++) {
1263       for (j=0; j<nc; j++) {
1264         cnt = 0;
1265         for (l=0; l<s; l++) {
1266           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1267         }
1268         if (dfill) {
1269           for (k=dfill[j]; k<dfill[j+1]; k++) {
1270             cols[cnt++] = i*nc + dfill[k];
1271           }
1272         } else {
1273           for (k=0; k<nc; k++) {
1274             cols[cnt++] = i*nc + k;
1275           }
1276         }
1277         for (l=0; l<s; l++) {
1278           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1279         }
1280         ierr = MatSetValues(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1281         row++;
1282       }
1283     }
1284     /* coupling with process to the right */
1285     for (i=xs+nx-s; i<xs+nx; i++) {
1286       for (j=0; j<nc; j++) {
1287         cnt = 0;
1288         for (l=0; l<s; l++) {
1289           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1290         }
1291         if (dfill) {
1292           for (k=dfill[j]; k<dfill[j+1]; k++) {
1293             cols[cnt++] = i*nc + dfill[k];
1294           }
1295         } else {
1296           for (k=0; k<nc; k++) {
1297             cols[cnt++] = i*nc + k;
1298           }
1299         }
1300         if (rank < size-1) {
1301           for (l=0; l<s; l++) {
1302             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1303           }
1304         }
1305         ierr = MatSetValues(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1306         row++;
1307       }
1308     }
1309     ierr = PetscFree(values);CHKERRQ(ierr);
1310     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1311     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1312     ierr = PetscFree(cols);CHKERRQ(ierr);
1313   }
1314   PetscFunctionReturn(0);
1315 }
1316 
1317 /* ---------------------------------------------------------------------------------*/
1318 
1319 #undef __FUNCT__
1320 #define __FUNCT__ "DMCreateMatrix_DA_1d_MPIAIJ"
1321 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da,Mat J)
1322 {
1323   PetscErrorCode         ierr;
1324   PetscInt               xs,nx,i,i1,slot,gxs,gnx;
1325   PetscInt               m,dim,s,*cols = NULL,nc,*rows = NULL,col,cnt,l;
1326   PetscInt               istart,iend;
1327   PetscScalar            *values;
1328   DMDABoundaryType       bx;
1329   ISLocalToGlobalMapping ltog,ltogb;
1330 
1331   PetscFunctionBegin;
1332   /*
1333          nc - number of components per grid point
1334          col - number of colors needed in one direction for single component problem
1335 
1336   */
1337   ierr = DMDAGetInfo(da,&dim,&m,0,0,0,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr);
1338   col  = 2*s + 1;
1339 
1340   ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
1341   ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
1342 
1343   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1344   ierr = MatSeqAIJSetPreallocation(J,col*nc,0);CHKERRQ(ierr);
1345   ierr = MatMPIAIJSetPreallocation(J,col*nc,0,col*nc,0);CHKERRQ(ierr);
1346 
1347   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1348   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1349   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1350   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1351 
1352   /*
1353     For each node in the grid: we get the neighbors in the local (on processor ordering
1354     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1355     PETSc ordering.
1356   */
1357   if (!da->prealloc_only) {
1358     ierr = PetscMalloc2(nc,PetscInt,&rows,col*nc*nc,PetscInt,&cols);CHKERRQ(ierr);
1359     ierr = PetscMalloc(col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1360     ierr = PetscMemzero(values,col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1361     for (i=xs; i<xs+nx; i++) {
1362       istart = PetscMax(-s,gxs - i);
1363       iend   = PetscMin(s,gxs + gnx - i - 1);
1364       slot   = i - gxs;
1365 
1366       cnt = 0;
1367       for (l=0; l<nc; l++) {
1368         for (i1=istart; i1<iend+1; i1++) {
1369           cols[cnt++] = l + nc*(slot + i1);
1370         }
1371         rows[l] = l + nc*(slot);
1372       }
1373       ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1374     }
1375     ierr = PetscFree(values);CHKERRQ(ierr);
1376     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1377     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1378     ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1379   }
1380   PetscFunctionReturn(0);
1381 }
1382 
1383 #undef __FUNCT__
1384 #define __FUNCT__ "DMCreateMatrix_DA_2d_MPIBAIJ"
1385 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da,Mat J)
1386 {
1387   PetscErrorCode         ierr;
1388   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1389   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
1390   PetscInt               istart,iend,jstart,jend,ii,jj;
1391   MPI_Comm               comm;
1392   PetscScalar            *values;
1393   DMDABoundaryType       bx,by;
1394   DMDAStencilType        st;
1395   ISLocalToGlobalMapping ltog,ltogb;
1396 
1397   PetscFunctionBegin;
1398   /*
1399      nc - number of components per grid point
1400      col - number of colors needed in one direction for single component problem
1401   */
1402   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
1403   col  = 2*s + 1;
1404 
1405   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
1406   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
1407   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1408 
1409   ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1410 
1411   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1412   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1413 
1414   /* determine the matrix preallocation information */
1415   ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
1416   for (i=xs; i<xs+nx; i++) {
1417     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1418     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1419     for (j=ys; j<ys+ny; j++) {
1420       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1421       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1422       slot   = i - gxs + gnx*(j - gys);
1423 
1424       /* Find block columns in block row */
1425       cnt = 0;
1426       for (ii=istart; ii<iend+1; ii++) {
1427         for (jj=jstart; jj<jend+1; jj++) {
1428           if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1429             cols[cnt++] = slot + ii + gnx*jj;
1430           }
1431         }
1432       }
1433       ierr = MatPreallocateSetLocal(ltogb,1,&slot,ltogb,cnt,cols,dnz,onz);CHKERRQ(ierr);
1434     }
1435   }
1436   ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1437   ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1438   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1439 
1440   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1441   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1442 
1443   /*
1444     For each node in the grid: we get the neighbors in the local (on processor ordering
1445     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1446     PETSc ordering.
1447   */
1448   if (!da->prealloc_only) {
1449     ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1450     ierr = PetscMemzero(values,col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1451     for (i=xs; i<xs+nx; i++) {
1452       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1453       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1454       for (j=ys; j<ys+ny; j++) {
1455         jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1456         jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1457         slot = i - gxs + gnx*(j - gys);
1458         cnt  = 0;
1459         for (ii=istart; ii<iend+1; ii++) {
1460           for (jj=jstart; jj<jend+1; jj++) {
1461             if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1462               cols[cnt++] = slot + ii + gnx*jj;
1463             }
1464           }
1465         }
1466         ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1467       }
1468     }
1469     ierr = PetscFree(values);CHKERRQ(ierr);
1470     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1471     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1472   }
1473   ierr = PetscFree(cols);CHKERRQ(ierr);
1474   PetscFunctionReturn(0);
1475 }
1476 
1477 #undef __FUNCT__
1478 #define __FUNCT__ "DMCreateMatrix_DA_3d_MPIBAIJ"
1479 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da,Mat J)
1480 {
1481   PetscErrorCode         ierr;
1482   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1483   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
1484   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1485   MPI_Comm               comm;
1486   PetscScalar            *values;
1487   DMDABoundaryType       bx,by,bz;
1488   DMDAStencilType        st;
1489   ISLocalToGlobalMapping ltog,ltogb;
1490 
1491   PetscFunctionBegin;
1492   /*
1493          nc - number of components per grid point
1494          col - number of colors needed in one direction for single component problem
1495 
1496   */
1497   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1498   col  = 2*s + 1;
1499 
1500   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1501   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1502   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1503 
1504   ierr = PetscMalloc(col*col*col*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1505 
1506   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1507   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1508 
1509   /* determine the matrix preallocation information */
1510   ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1511   for (i=xs; i<xs+nx; i++) {
1512     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1513     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1514     for (j=ys; j<ys+ny; j++) {
1515       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1516       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1517       for (k=zs; k<zs+nz; k++) {
1518         kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1519         kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1520 
1521         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1522 
1523         /* Find block columns in block row */
1524         cnt = 0;
1525         for (ii=istart; ii<iend+1; ii++) {
1526           for (jj=jstart; jj<jend+1; jj++) {
1527             for (kk=kstart; kk<kend+1; kk++) {
1528               if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1529                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1530               }
1531             }
1532           }
1533         }
1534         ierr = MatPreallocateSetLocal(ltogb,1,&slot,ltogb,cnt,cols,dnz,onz);CHKERRQ(ierr);
1535       }
1536     }
1537   }
1538   ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1539   ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1540   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1541 
1542   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1543   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1544 
1545   /*
1546     For each node in the grid: we get the neighbors in the local (on processor ordering
1547     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1548     PETSc ordering.
1549   */
1550   if (!da->prealloc_only) {
1551     ierr = PetscMalloc(col*col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1552     ierr = PetscMemzero(values,col*col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1553     for (i=xs; i<xs+nx; i++) {
1554       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1555       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1556       for (j=ys; j<ys+ny; j++) {
1557         jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1558         jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1559         for (k=zs; k<zs+nz; k++) {
1560           kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1561           kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1562 
1563           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1564 
1565           cnt = 0;
1566           for (ii=istart; ii<iend+1; ii++) {
1567             for (jj=jstart; jj<jend+1; jj++) {
1568               for (kk=kstart; kk<kend+1; kk++) {
1569                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1570                   cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1571                 }
1572               }
1573             }
1574           }
1575           ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1576         }
1577       }
1578     }
1579     ierr = PetscFree(values);CHKERRQ(ierr);
1580     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1581     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1582   }
1583   ierr = PetscFree(cols);CHKERRQ(ierr);
1584   PetscFunctionReturn(0);
1585 }
1586 
1587 #undef __FUNCT__
1588 #define __FUNCT__ "L2GFilterUpperTriangular"
1589 /*
1590   This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to
1591   identify in the local ordering with periodic domain.
1592 */
1593 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog,PetscInt *row,PetscInt *cnt,PetscInt col[])
1594 {
1595   PetscErrorCode ierr;
1596   PetscInt       i,n;
1597 
1598   PetscFunctionBegin;
1599   ierr = ISLocalToGlobalMappingApply(ltog,1,row,row);CHKERRQ(ierr);
1600   ierr = ISLocalToGlobalMappingApply(ltog,*cnt,col,col);CHKERRQ(ierr);
1601   for (i=0,n=0; i<*cnt; i++) {
1602     if (col[i] >= *row) col[n++] = col[i];
1603   }
1604   *cnt = n;
1605   PetscFunctionReturn(0);
1606 }
1607 
1608 #undef __FUNCT__
1609 #define __FUNCT__ "DMCreateMatrix_DA_2d_MPISBAIJ"
1610 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da,Mat J)
1611 {
1612   PetscErrorCode         ierr;
1613   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1614   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
1615   PetscInt               istart,iend,jstart,jend,ii,jj;
1616   MPI_Comm               comm;
1617   PetscScalar            *values;
1618   DMDABoundaryType       bx,by;
1619   DMDAStencilType        st;
1620   ISLocalToGlobalMapping ltog,ltogb;
1621 
1622   PetscFunctionBegin;
1623   /*
1624      nc - number of components per grid point
1625      col - number of colors needed in one direction for single component problem
1626   */
1627   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
1628   col  = 2*s + 1;
1629 
1630   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
1631   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
1632   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1633 
1634   ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1635 
1636   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1637   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1638 
1639   /* determine the matrix preallocation information */
1640   ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
1641   for (i=xs; i<xs+nx; i++) {
1642     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1643     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1644     for (j=ys; j<ys+ny; j++) {
1645       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1646       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1647       slot   = i - gxs + gnx*(j - gys);
1648 
1649       /* Find block columns in block row */
1650       cnt = 0;
1651       for (ii=istart; ii<iend+1; ii++) {
1652         for (jj=jstart; jj<jend+1; jj++) {
1653           if (st == DMDA_STENCIL_BOX || !ii || !jj) {
1654             cols[cnt++] = slot + ii + gnx*jj;
1655           }
1656         }
1657       }
1658       ierr = L2GFilterUpperTriangular(ltogb,&slot,&cnt,cols);CHKERRQ(ierr);
1659       ierr = MatPreallocateSymmetricSet(slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
1660     }
1661   }
1662   ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1663   ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1664   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1665 
1666   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1667   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1668 
1669   /*
1670     For each node in the grid: we get the neighbors in the local (on processor ordering
1671     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1672     PETSc ordering.
1673   */
1674   if (!da->prealloc_only) {
1675     ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1676     ierr = PetscMemzero(values,col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1677     for (i=xs; i<xs+nx; i++) {
1678       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1679       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1680       for (j=ys; j<ys+ny; j++) {
1681         jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1682         jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1683         slot   = i - gxs + gnx*(j - gys);
1684 
1685         /* Find block columns in block row */
1686         cnt = 0;
1687         for (ii=istart; ii<iend+1; ii++) {
1688           for (jj=jstart; jj<jend+1; jj++) {
1689             if (st == DMDA_STENCIL_BOX || !ii || !jj) {
1690               cols[cnt++] = slot + ii + gnx*jj;
1691             }
1692           }
1693         }
1694         ierr = L2GFilterUpperTriangular(ltogb,&slot,&cnt,cols);CHKERRQ(ierr);
1695         ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1696       }
1697     }
1698     ierr = PetscFree(values);CHKERRQ(ierr);
1699     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1700     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1701   }
1702   ierr = PetscFree(cols);CHKERRQ(ierr);
1703   PetscFunctionReturn(0);
1704 }
1705 
1706 #undef __FUNCT__
1707 #define __FUNCT__ "DMCreateMatrix_DA_3d_MPISBAIJ"
1708 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da,Mat J)
1709 {
1710   PetscErrorCode         ierr;
1711   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1712   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
1713   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1714   MPI_Comm               comm;
1715   PetscScalar            *values;
1716   DMDABoundaryType       bx,by,bz;
1717   DMDAStencilType        st;
1718   ISLocalToGlobalMapping ltog,ltogb;
1719 
1720   PetscFunctionBegin;
1721   /*
1722      nc - number of components per grid point
1723      col - number of colors needed in one direction for single component problem
1724   */
1725   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1726   col  = 2*s + 1;
1727 
1728   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1729   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1730   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1731 
1732   /* create the matrix */
1733   ierr = PetscMalloc(col*col*col*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1734 
1735   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1736   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1737 
1738   /* determine the matrix preallocation information */
1739   ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1740   for (i=xs; i<xs+nx; i++) {
1741     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1742     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1743     for (j=ys; j<ys+ny; j++) {
1744       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1745       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1746       for (k=zs; k<zs+nz; k++) {
1747         kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1748         kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1749 
1750         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1751 
1752         /* Find block columns in block row */
1753         cnt = 0;
1754         for (ii=istart; ii<iend+1; ii++) {
1755           for (jj=jstart; jj<jend+1; jj++) {
1756             for (kk=kstart; kk<kend+1; kk++) {
1757               if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
1758                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1759               }
1760             }
1761           }
1762         }
1763         ierr = L2GFilterUpperTriangular(ltogb,&slot,&cnt,cols);CHKERRQ(ierr);
1764         ierr = MatPreallocateSymmetricSet(slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
1765       }
1766     }
1767   }
1768   ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1769   ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1770   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1771 
1772   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1773   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1774 
1775   /*
1776     For each node in the grid: we get the neighbors in the local (on processor ordering
1777     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1778     PETSc ordering.
1779   */
1780   if (!da->prealloc_only) {
1781     ierr = PetscMalloc(col*col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1782     ierr = PetscMemzero(values,col*col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1783     for (i=xs; i<xs+nx; i++) {
1784       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1785       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1786       for (j=ys; j<ys+ny; j++) {
1787         jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1788         jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1789         for (k=zs; k<zs+nz; k++) {
1790           kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1791           kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1792 
1793           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1794 
1795           cnt = 0;
1796           for (ii=istart; ii<iend+1; ii++) {
1797             for (jj=jstart; jj<jend+1; jj++) {
1798               for (kk=kstart; kk<kend+1; kk++) {
1799                 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
1800                   cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1801                 }
1802               }
1803             }
1804           }
1805           ierr = L2GFilterUpperTriangular(ltogb,&slot,&cnt,cols);CHKERRQ(ierr);
1806           ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1807         }
1808       }
1809     }
1810     ierr = PetscFree(values);CHKERRQ(ierr);
1811     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1812     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1813   }
1814   ierr = PetscFree(cols);CHKERRQ(ierr);
1815   PetscFunctionReturn(0);
1816 }
1817 
1818 /* ---------------------------------------------------------------------------------*/
1819 
1820 #undef __FUNCT__
1821 #define __FUNCT__ "DMCreateMatrix_DA_3d_MPIAIJ_Fill"
1822 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da,Mat J)
1823 {
1824   PetscErrorCode         ierr;
1825   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1826   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt,l,p,*dnz,*onz;
1827   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1828   DM_DA                  *dd = (DM_DA*)da->data;
1829   PetscInt               ifill_col,*dfill = dd->dfill,*ofill = dd->ofill;
1830   MPI_Comm               comm;
1831   PetscScalar            *values;
1832   DMDABoundaryType       bx,by,bz;
1833   ISLocalToGlobalMapping ltog,ltogb;
1834   DMDAStencilType        st;
1835 
1836   PetscFunctionBegin;
1837   /*
1838          nc - number of components per grid point
1839          col - number of colors needed in one direction for single component problem
1840 
1841   */
1842   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1843   col  = 2*s + 1;
1844   if (bx == DMDA_BOUNDARY_PERIODIC && (m % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
1845                  by 2*stencil_width + 1\n");
1846   if (by == DMDA_BOUNDARY_PERIODIC && (n % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
1847                  by 2*stencil_width + 1\n");
1848   if (bz == DMDA_BOUNDARY_PERIODIC && (p % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
1849                  by 2*stencil_width + 1\n");
1850 
1851   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1852   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1853   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1854 
1855   ierr = PetscMalloc(col*col*col*nc*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1856   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1857   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1858 
1859   /* determine the matrix preallocation information */
1860   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1861 
1862 
1863   for (i=xs; i<xs+nx; i++) {
1864     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1865     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1866     for (j=ys; j<ys+ny; j++) {
1867       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1868       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1869       for (k=zs; k<zs+nz; k++) {
1870         kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1871         kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1872 
1873         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1874 
1875         for (l=0; l<nc; l++) {
1876           cnt = 0;
1877           for (ii=istart; ii<iend+1; ii++) {
1878             for (jj=jstart; jj<jend+1; jj++) {
1879               for (kk=kstart; kk<kend+1; kk++) {
1880                 if (ii || jj || kk) {
1881                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1882                     for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1883                   }
1884                 } else {
1885                   if (dfill) {
1886                     for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1887                   } else {
1888                     for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1889                   }
1890                 }
1891               }
1892             }
1893           }
1894           row  = l + nc*(slot);
1895           ierr = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1896         }
1897       }
1898     }
1899   }
1900   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1901   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1902   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1903   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1904   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1905 
1906   /*
1907     For each node in the grid: we get the neighbors in the local (on processor ordering
1908     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1909     PETSc ordering.
1910   */
1911   if (!da->prealloc_only) {
1912     ierr = PetscMalloc(col*col*col*nc*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1913     ierr = PetscMemzero(values,col*col*col*nc*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1914     for (i=xs; i<xs+nx; i++) {
1915       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1916       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1917       for (j=ys; j<ys+ny; j++) {
1918         jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1919         jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1920         for (k=zs; k<zs+nz; k++) {
1921           kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1922           kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1923 
1924           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1925 
1926           for (l=0; l<nc; l++) {
1927             cnt = 0;
1928             for (ii=istart; ii<iend+1; ii++) {
1929               for (jj=jstart; jj<jend+1; jj++) {
1930                 for (kk=kstart; kk<kend+1; kk++) {
1931                   if (ii || jj || kk) {
1932                     if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1933                       for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1934                     }
1935                   } else {
1936                     if (dfill) {
1937                       for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1938                     } else {
1939                       for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1940                     }
1941                   }
1942                 }
1943               }
1944             }
1945             row  = l + nc*(slot);
1946             ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1947           }
1948         }
1949       }
1950     }
1951     ierr = PetscFree(values);CHKERRQ(ierr);
1952     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1953     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1954   }
1955   ierr = PetscFree(cols);CHKERRQ(ierr);
1956   PetscFunctionReturn(0);
1957 }
1958