xref: /petsc/src/dm/impls/da/fdda.c (revision 8cbdbec6f8317ddf7886f91eb9c6bd083b543c50)
1 
2 #include <petsc-private/daimpl.h> /*I      "petscdmda.h"     I*/
3 #include <petscmat.h>         /*I      "petscmat.h"    I*/
4 #include <petsc-private/matimpl.h>
5 
6 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM,ISColoringType,ISColoring *);
7 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM,ISColoringType,ISColoring *);
8 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM,ISColoringType,ISColoring *);
9 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM,ISColoringType,ISColoring *);
10 
11 /*
12    For ghost i that may be negative or greater than the upper bound this
13   maps it into the 0:m-1 range using periodicity
14 */
15 #define SetInRange(i,m) ((i < 0) ? m+i:((i >= m) ? i-m:i))
16 
17 #undef __FUNCT__
18 #define __FUNCT__ "DMDASetBlockFills_Private"
19 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill,PetscInt w,PetscInt **rfill)
20 {
21   PetscErrorCode ierr;
22   PetscInt       i,j,nz,*fill;
23 
24   PetscFunctionBegin;
25   if (!dfill) PetscFunctionReturn(0);
26 
27   /* count number nonzeros */
28   nz = 0;
29   for (i=0; i<w; i++) {
30     for (j=0; j<w; j++) {
31       if (dfill[w*i+j]) nz++;
32     }
33   }
34   ierr = PetscMalloc((nz + w + 1)*sizeof(PetscInt),&fill);CHKERRQ(ierr);
35   /* construct modified CSR storage of nonzero structure */
36   /*  fill[0 -- w] marks starts of each row of column indices (and end of last row)
37    so fill[1] - fill[0] gives number of nonzeros in first row etc */
38   nz = w + 1;
39   for (i=0; i<w; i++) {
40     fill[i] = nz;
41     for (j=0; j<w; j++) {
42       if (dfill[w*i+j]) {
43 	fill[nz] = j;
44 	nz++;
45       }
46     }
47   }
48   fill[w] = nz;
49 
50   *rfill = fill;
51   PetscFunctionReturn(0);
52 }
53 
54 #undef __FUNCT__
55 #define __FUNCT__ "DMDASetBlockFills"
56 /*@
57     DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem
58     of the matrix returned by DMCreateMatrix().
59 
60     Logically Collective on DMDA
61 
62     Input Parameter:
63 +   da - the distributed array
64 .   dfill - the fill pattern in the diagonal block (may be PETSC_NULL, means use dense block)
65 -   ofill - the fill pattern in the off-diagonal blocks
66 
67 
68     Level: developer
69 
70     Notes: This only makes sense when you are doing multicomponent problems but using the
71        MPIAIJ matrix format
72 
73            The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries
74        representing coupling and 0 entries for missing coupling. For example
75 $             dfill[9] = {1, 0, 0,
76 $                         1, 1, 0,
77 $                         0, 1, 1}
78        means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with
79        itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the
80        diagonal block).
81 
82      DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
83      can be represented in the dfill, ofill format
84 
85    Contributed by Glenn Hammond
86 
87 .seealso DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly()
88 
89 @*/
90 PetscErrorCode  DMDASetBlockFills(DM da,const PetscInt *dfill,const PetscInt *ofill)
91 {
92   DM_DA          *dd = (DM_DA*)da->data;
93   PetscErrorCode ierr;
94   PetscInt       i,k,cnt = 1;
95 
96   PetscFunctionBegin;
97   ierr = DMDASetBlockFills_Private(dfill,dd->w,&dd->dfill);CHKERRQ(ierr);
98   ierr = DMDASetBlockFills_Private(ofill,dd->w,&dd->ofill);CHKERRQ(ierr);
99 
100   /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of
101    columns to the left with any nonzeros in them plus 1 */
102   ierr = PetscMalloc(dd->w*sizeof(PetscBool),&dd->ofillcols);CHKERRQ(ierr);
103   ierr = PetscMemzero(dd->ofillcols,dd->w*sizeof(PetscBool));CHKERRQ(ierr);
104   for (i=0; i<dd->w; i++) {
105     for (k=dd->ofill[i]; k<dd->ofill[i+1]; k++) dd->ofillcols[dd->ofill[k]] = 1;
106   }
107   for (i=0; i<dd->w; i++) {
108     if (dd->ofillcols[i]) {
109       dd->ofillcols[i] = cnt++;
110     }
111   }
112   PetscFunctionReturn(0);
113 }
114 
115 
116 #undef __FUNCT__
117 #define __FUNCT__ "DMCreateColoring_DA"
118 PetscErrorCode  DMCreateColoring_DA(DM da,ISColoringType ctype,MatType mtype,ISColoring *coloring)
119 {
120   PetscErrorCode   ierr;
121   PetscInt         dim,m,n,p,nc;
122   DMDABoundaryType bx,by,bz;
123   MPI_Comm         comm;
124   PetscMPIInt      size;
125   PetscBool        isBAIJ;
126   DM_DA            *dd = (DM_DA*)da->data;
127 
128   PetscFunctionBegin;
129   /*
130                                   m
131           ------------------------------------------------------
132          |                                                     |
133          |                                                     |
134          |               ----------------------                |
135          |               |                    |                |
136       n  |           yn  |                    |                |
137          |               |                    |                |
138          |               .---------------------                |
139          |             (xs,ys)     xn                          |
140          |            .                                        |
141          |         (gxs,gys)                                   |
142          |                                                     |
143           -----------------------------------------------------
144   */
145 
146   /*
147          nc - number of components per grid point
148          col - number of colors needed in one direction for single component problem
149 
150   */
151   ierr = DMDAGetInfo(da,&dim,0,0,0,&m,&n,&p,&nc,0,&bx,&by,&bz,0);CHKERRQ(ierr);
152 
153   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
154   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
155   if (ctype == IS_COLORING_GHOSTED){
156     if (size == 1) {
157       ctype = IS_COLORING_GLOBAL;
158     } else if (dim > 1){
159       if ((m==1 && bx == DMDA_BOUNDARY_PERIODIC) || (n==1 && by == DMDA_BOUNDARY_PERIODIC) || (p==1 && bz == DMDA_BOUNDARY_PERIODIC)){
160         SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"IS_COLORING_GHOSTED cannot be used for periodic boundary condition having both ends of the domain  on the same process");
161       }
162     }
163   }
164 
165   /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ
166      matrices is for the blocks, not the individual matrix elements  */
167   ierr = PetscStrcmp(mtype,MATBAIJ,&isBAIJ);CHKERRQ(ierr);
168   if (!isBAIJ) {ierr = PetscStrcmp(mtype,MATMPIBAIJ,&isBAIJ);CHKERRQ(ierr);}
169   if (!isBAIJ) {ierr = PetscStrcmp(mtype,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);}
170   if (isBAIJ) {
171     dd->w = 1;
172     dd->xs = dd->xs/nc;
173     dd->xe = dd->xe/nc;
174     dd->Xs = dd->Xs/nc;
175     dd->Xe = dd->Xe/nc;
176   }
177 
178   /*
179      We do not provide a getcoloring function in the DMDA operations because
180    the basic DMDA does not know about matrices. We think of DMDA as being more
181    more low-level then matrices.
182   */
183   if (dim == 1) {
184     ierr = DMCreateColoring_DA_1d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
185   } else if (dim == 2) {
186     ierr =  DMCreateColoring_DA_2d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
187   } else if (dim == 3) {
188     ierr =  DMCreateColoring_DA_3d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
189   } else SETERRQ1(((PetscObject)da)->comm,PETSC_ERR_SUP,"Not done for %D dimension, send us mail petsc-maint@mcs.anl.gov for code",dim);
190   if (isBAIJ) {
191     dd->w = nc;
192     dd->xs = dd->xs*nc;
193     dd->xe = dd->xe*nc;
194     dd->Xs = dd->Xs*nc;
195     dd->Xe = dd->Xe*nc;
196   }
197   PetscFunctionReturn(0);
198 }
199 
200 /* ---------------------------------------------------------------------------------*/
201 
202 #undef __FUNCT__
203 #define __FUNCT__ "DMCreateColoring_DA_2d_MPIAIJ"
204 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
205 {
206   PetscErrorCode   ierr;
207   PetscInt         xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,M,N,dim,s,k,nc,col;
208   PetscInt         ncolors;
209   MPI_Comm         comm;
210   DMDABoundaryType bx,by;
211   DMDAStencilType  st;
212   ISColoringValue  *colors;
213   DM_DA            *dd = (DM_DA*)da->data;
214 
215   PetscFunctionBegin;
216   /*
217          nc - number of components per grid point
218          col - number of colors needed in one direction for single component problem
219 
220   */
221   ierr = DMDAGetInfo(da,&dim,&m,&n,0,&M,&N,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
222   col    = 2*s + 1;
223   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
224   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
225   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
226 
227   /* special case as taught to us by Paul Hovland */
228   if (st == DMDA_STENCIL_STAR && s == 1) {
229     ierr = DMCreateColoring_DA_2d_5pt_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
230   } else {
231 
232     if (bx == DMDA_BOUNDARY_PERIODIC && (m % col)) SETERRQ2(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X (%d) is divisible\n\
233                                                             by 2*stencil_width + 1 (%d)\n", m, col);
234     if (by == DMDA_BOUNDARY_PERIODIC && (n % col)) SETERRQ2(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y (%d) is divisible\n\
235                                                             by 2*stencil_width + 1 (%d)\n", n, col);
236     if (ctype == IS_COLORING_GLOBAL) {
237       if (!dd->localcoloring) {
238 	ierr = PetscMalloc(nc*nx*ny*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
239 	ii = 0;
240 	for (j=ys; j<ys+ny; j++) {
241 	  for (i=xs; i<xs+nx; i++) {
242 	    for (k=0; k<nc; k++) {
243 	      colors[ii++] = k + nc*((i % col) + col*(j % col));
244 	    }
245 	  }
246 	}
247         ncolors = nc + nc*(col-1 + col*(col-1));
248 	ierr = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,&dd->localcoloring);CHKERRQ(ierr);
249       }
250       *coloring = dd->localcoloring;
251     } else if (ctype == IS_COLORING_GHOSTED) {
252       if (!dd->ghostedcoloring) {
253 	ierr = PetscMalloc(nc*gnx*gny*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
254 	ii = 0;
255 	for (j=gys; j<gys+gny; j++) {
256 	  for (i=gxs; i<gxs+gnx; i++) {
257 	    for (k=0; k<nc; k++) {
258 	      /* the complicated stuff is to handle periodic boundaries */
259 	      colors[ii++] = k + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col));
260 	    }
261 	  }
262 	}
263         ncolors = nc + nc*(col - 1 + col*(col-1));
264 	ierr = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,&dd->ghostedcoloring);CHKERRQ(ierr);
265         /* PetscIntView(ncolors,(PetscInt *)colors,0); */
266 
267 	ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_GHOSTED);CHKERRQ(ierr);
268       }
269       *coloring = dd->ghostedcoloring;
270     } else SETERRQ1(((PetscObject)da)->comm,PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
271   }
272   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
273   PetscFunctionReturn(0);
274 }
275 
276 /* ---------------------------------------------------------------------------------*/
277 
278 #undef __FUNCT__
279 #define __FUNCT__ "DMCreateColoring_DA_3d_MPIAIJ"
280 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
281 {
282   PetscErrorCode    ierr;
283   PetscInt          xs,ys,nx,ny,i,j,gxs,gys,gnx,gny,m,n,p,dim,s,k,nc,col,zs,gzs,ii,l,nz,gnz,M,N,P;
284   PetscInt          ncolors;
285   MPI_Comm          comm;
286   DMDABoundaryType  bx,by,bz;
287   DMDAStencilType   st;
288   ISColoringValue   *colors;
289   DM_DA             *dd = (DM_DA*)da->data;
290 
291   PetscFunctionBegin;
292   /*
293          nc - number of components per grid point
294          col - number of colors needed in one direction for single component problem
295 
296   */
297   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
298   col    = 2*s + 1;
299   if (bx == DMDA_BOUNDARY_PERIODIC && (m % col)) SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
300                                                          by 2*stencil_width + 1\n");
301   if (by == DMDA_BOUNDARY_PERIODIC && (n % col)) SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
302                                                          by 2*stencil_width + 1\n");
303   if (bz == DMDA_BOUNDARY_PERIODIC && (p % col)) SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
304                                                          by 2*stencil_width + 1\n");
305 
306   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
307   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
308   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
309 
310   /* create the coloring */
311   if (ctype == IS_COLORING_GLOBAL) {
312     if (!dd->localcoloring) {
313       ierr = PetscMalloc(nc*nx*ny*nz*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
314       ii = 0;
315       for (k=zs; k<zs+nz; k++) {
316         for (j=ys; j<ys+ny; j++) {
317           for (i=xs; i<xs+nx; i++) {
318             for (l=0; l<nc; l++) {
319               colors[ii++] = l + nc*((i % col) + col*(j % col) + col*col*(k % col));
320             }
321           }
322         }
323       }
324       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
325       ierr = ISColoringCreate(comm,ncolors,nc*nx*ny*nz,colors,&dd->localcoloring);CHKERRQ(ierr);
326     }
327     *coloring = dd->localcoloring;
328   } else if (ctype == IS_COLORING_GHOSTED) {
329     if (!dd->ghostedcoloring) {
330       ierr = PetscMalloc(nc*gnx*gny*gnz*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
331       ii = 0;
332       for (k=gzs; k<gzs+gnz; k++) {
333         for (j=gys; j<gys+gny; j++) {
334           for (i=gxs; i<gxs+gnx; i++) {
335             for (l=0; l<nc; l++) {
336               /* the complicated stuff is to handle periodic boundaries */
337               colors[ii++] = l + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col) + col*col*(SetInRange(k,p) % col));
338             }
339           }
340         }
341       }
342       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
343       ierr = ISColoringCreate(comm,ncolors,nc*gnx*gny*gnz,colors,&dd->ghostedcoloring);CHKERRQ(ierr);
344       ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_GHOSTED);CHKERRQ(ierr);
345     }
346     *coloring = dd->ghostedcoloring;
347   } else SETERRQ1(((PetscObject)da)->comm,PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
348   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
349   PetscFunctionReturn(0);
350 }
351 
352 /* ---------------------------------------------------------------------------------*/
353 
354 #undef __FUNCT__
355 #define __FUNCT__ "DMCreateColoring_DA_1d_MPIAIJ"
356 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
357 {
358   PetscErrorCode    ierr;
359   PetscInt          xs,nx,i,i1,gxs,gnx,l,m,M,dim,s,nc,col;
360   PetscInt          ncolors;
361   MPI_Comm          comm;
362   DMDABoundaryType  bx;
363   ISColoringValue   *colors;
364   DM_DA             *dd = (DM_DA*)da->data;
365 
366   PetscFunctionBegin;
367   /*
368          nc - number of components per grid point
369          col - number of colors needed in one direction for single component problem
370 
371   */
372   ierr = DMDAGetInfo(da,&dim,&m,0,0,&M,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr);
373   col    = 2*s + 1;
374 
375   if (bx == DMDA_BOUNDARY_PERIODIC && (m % col)) SETERRQ2(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points %d is divisible\n\
376                                                           by 2*stencil_width + 1 %d\n",(int)m,(int)col);
377 
378   ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
379   ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
380   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
381 
382   /* create the coloring */
383   if (ctype == IS_COLORING_GLOBAL) {
384     if (!dd->localcoloring) {
385       ierr = PetscMalloc(nc*nx*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
386       if (dd->ofillcols) {
387         PetscInt tc = 0;
388         for (i=0; i<nc; i++) tc += (PetscInt) (dd->ofillcols[i] > 0);
389         i1 = 0;
390         for (i=xs; i<xs+nx; i++) {
391           for (l=0; l<nc; l++) {
392             if (dd->ofillcols[l] && (i % col)) {
393               colors[i1++] =  nc - 1 + tc*((i % col) - 1) + dd->ofillcols[l];
394             } else {
395               colors[i1++] = l;
396             }
397           }
398         }
399         ncolors = nc + 2*s*tc;
400       } else {
401         i1 = 0;
402         for (i=xs; i<xs+nx; i++) {
403           for (l=0; l<nc; l++) {
404             colors[i1++] = l + nc*(i % col);
405           }
406         }
407         ncolors = nc + nc*(col-1);
408       }
409       ierr = ISColoringCreate(comm,ncolors,nc*nx,colors,&dd->localcoloring);CHKERRQ(ierr);
410     }
411     *coloring = dd->localcoloring;
412   } else if (ctype == IS_COLORING_GHOSTED) {
413     if (!dd->ghostedcoloring) {
414       ierr = PetscMalloc(nc*gnx*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
415       i1 = 0;
416       for (i=gxs; i<gxs+gnx; i++) {
417         for (l=0; l<nc; l++) {
418           /* the complicated stuff is to handle periodic boundaries */
419           colors[i1++] = l + nc*(SetInRange(i,m) % col);
420         }
421       }
422       ncolors = nc + nc*(col-1);
423       ierr = ISColoringCreate(comm,ncolors,nc*gnx,colors,&dd->ghostedcoloring);CHKERRQ(ierr);
424       ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_GHOSTED);CHKERRQ(ierr);
425     }
426     *coloring = dd->ghostedcoloring;
427   } else SETERRQ1(((PetscObject)da)->comm,PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
428   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
429   PetscFunctionReturn(0);
430 }
431 
432 #undef __FUNCT__
433 #define __FUNCT__ "DMCreateColoring_DA_2d_5pt_MPIAIJ"
434 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
435 {
436   PetscErrorCode    ierr;
437   PetscInt          xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,dim,s,k,nc;
438   PetscInt          ncolors;
439   MPI_Comm          comm;
440   DMDABoundaryType  bx,by;
441   ISColoringValue   *colors;
442   DM_DA             *dd = (DM_DA*)da->data;
443 
444   PetscFunctionBegin;
445   /*
446          nc - number of components per grid point
447          col - number of colors needed in one direction for single component problem
448 
449   */
450   ierr   = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,0);CHKERRQ(ierr);
451   ierr   = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
452   ierr   = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
453   ierr   = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
454 
455   if (bx == DMDA_BOUNDARY_PERIODIC && (m % 5)) SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible by 5\n");
456   if (by == DMDA_BOUNDARY_PERIODIC && (n % 5)) SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible by 5\n");
457 
458   /* create the coloring */
459   if (ctype == IS_COLORING_GLOBAL) {
460     if (!dd->localcoloring) {
461       ierr = PetscMalloc(nc*nx*ny*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
462       ii = 0;
463       for (j=ys; j<ys+ny; j++) {
464 	for (i=xs; i<xs+nx; i++) {
465 	  for (k=0; k<nc; k++) {
466 	    colors[ii++] = k + nc*((3*j+i) % 5);
467 	  }
468 	}
469       }
470       ncolors = 5*nc;
471       ierr = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,&dd->localcoloring);CHKERRQ(ierr);
472     }
473     *coloring = dd->localcoloring;
474   } else if (ctype == IS_COLORING_GHOSTED) {
475     if (!dd->ghostedcoloring) {
476       ierr = PetscMalloc(nc*gnx*gny*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
477       ii = 0;
478       for (j=gys; j<gys+gny; j++) {
479 	for (i=gxs; i<gxs+gnx; i++) {
480 	  for (k=0; k<nc; k++) {
481 	    colors[ii++] = k + nc*((3*SetInRange(j,n) + SetInRange(i,m)) % 5);
482 	  }
483 	}
484       }
485       ncolors = 5*nc;
486       ierr = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,&dd->ghostedcoloring);CHKERRQ(ierr);
487       ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_GHOSTED);CHKERRQ(ierr);
488     }
489     *coloring = dd->ghostedcoloring;
490   } else SETERRQ1(((PetscObject)da)->comm,PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
491   PetscFunctionReturn(0);
492 }
493 
494 /* =========================================================================== */
495 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM,Mat);
496 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM,Mat);
497 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM,Mat);
498 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM,Mat);
499 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM,Mat);
500 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM,Mat);
501 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM,Mat);
502 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM,Mat);
503 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM,Mat);
504 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM,Mat);
505 
506 #undef __FUNCT__
507 #define __FUNCT__ "MatSetupDM"
508 /*@C
509    MatSetupDM - Sets the DMDA that is to be used by the HYPRE_StructMatrix PETSc matrix
510 
511    Logically Collective on Mat
512 
513    Input Parameters:
514 +  mat - the matrix
515 -  da - the da
516 
517    Level: intermediate
518 
519 @*/
520 PetscErrorCode MatSetupDM(Mat mat,DM da)
521 {
522   PetscErrorCode ierr;
523 
524   PetscFunctionBegin;
525   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
526   PetscValidHeaderSpecific(da,DM_CLASSID,1);
527   ierr = PetscTryMethod(mat,"MatSetupDM_C",(Mat,DM),(mat,da));CHKERRQ(ierr);
528   PetscFunctionReturn(0);
529 }
530 
531 EXTERN_C_BEGIN
532 #undef __FUNCT__
533 #define __FUNCT__ "MatView_MPI_DA"
534 PetscErrorCode  MatView_MPI_DA(Mat A,PetscViewer viewer)
535 {
536   DM             da;
537   PetscErrorCode ierr;
538   const char     *prefix;
539   Mat            Anatural;
540   AO             ao;
541   PetscInt       rstart,rend,*petsc,i;
542   IS             is;
543   MPI_Comm       comm;
544   PetscViewerFormat format;
545 
546   PetscFunctionBegin;
547   /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */
548   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
549   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(0);
550 
551   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
552   ierr = MatGetDM(A, &da);CHKERRQ(ierr);
553   if (!da) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
554 
555   ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);
556   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
557   ierr = PetscMalloc((rend-rstart)*sizeof(PetscInt),&petsc);CHKERRQ(ierr);
558   for (i=rstart; i<rend; i++) petsc[i-rstart] = i;
559   ierr = AOApplicationToPetsc(ao,rend-rstart,petsc);CHKERRQ(ierr);
560   ierr = ISCreateGeneral(comm,rend-rstart,petsc,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
561 
562   /* call viewer on natural ordering */
563   ierr = MatGetSubMatrix(A,is,is,MAT_INITIAL_MATRIX,&Anatural);CHKERRQ(ierr);
564   ierr = ISDestroy(&is);CHKERRQ(ierr);
565   ierr = PetscObjectGetOptionsPrefix((PetscObject)A,&prefix);CHKERRQ(ierr);
566   ierr = PetscObjectSetOptionsPrefix((PetscObject)Anatural,prefix);CHKERRQ(ierr);
567   ierr = PetscObjectSetName((PetscObject)Anatural,((PetscObject)A)->name);CHKERRQ(ierr);
568   ierr = MatView(Anatural,viewer);CHKERRQ(ierr);
569   ierr = MatDestroy(&Anatural);CHKERRQ(ierr);
570   PetscFunctionReturn(0);
571 }
572 EXTERN_C_END
573 
574 EXTERN_C_BEGIN
575 #undef __FUNCT__
576 #define __FUNCT__ "MatLoad_MPI_DA"
577 PetscErrorCode  MatLoad_MPI_DA(Mat A,PetscViewer viewer)
578 {
579   DM             da;
580   PetscErrorCode ierr;
581   Mat            Anatural,Aapp;
582   AO             ao;
583   PetscInt       rstart,rend,*app,i;
584   IS             is;
585   MPI_Comm       comm;
586 
587   PetscFunctionBegin;
588   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
589   ierr = MatGetDM(A, &da);CHKERRQ(ierr);
590   if (!da) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
591 
592   /* Load the matrix in natural ordering */
593   ierr = MatCreate(((PetscObject)A)->comm,&Anatural);CHKERRQ(ierr);
594   ierr = MatSetType(Anatural,((PetscObject)A)->type_name);CHKERRQ(ierr);
595   ierr = MatSetSizes(Anatural,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
596   ierr = MatLoad(Anatural,viewer);CHKERRQ(ierr);
597 
598   /* Map natural ordering to application ordering and create IS */
599   ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);
600   ierr = MatGetOwnershipRange(Anatural,&rstart,&rend);CHKERRQ(ierr);
601   ierr = PetscMalloc((rend-rstart)*sizeof(PetscInt),&app);CHKERRQ(ierr);
602   for (i=rstart; i<rend; i++) app[i-rstart] = i;
603   ierr = AOPetscToApplication(ao,rend-rstart,app);CHKERRQ(ierr);
604   ierr = ISCreateGeneral(comm,rend-rstart,app,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
605 
606   /* Do permutation and replace header */
607   ierr = MatGetSubMatrix(Anatural,is,is,MAT_INITIAL_MATRIX,&Aapp);CHKERRQ(ierr);
608   ierr = MatHeaderReplace(A,Aapp);CHKERRQ(ierr);
609   ierr = ISDestroy(&is);CHKERRQ(ierr);
610   ierr = MatDestroy(&Anatural);CHKERRQ(ierr);
611   PetscFunctionReturn(0);
612 }
613 EXTERN_C_END
614 
615 #undef __FUNCT__
616 #define __FUNCT__ "DMCreateMatrix_DA"
617 PetscErrorCode DMCreateMatrix_DA(DM da, MatType mtype,Mat *J)
618 {
619   PetscErrorCode ierr;
620   PetscInt       dim,dof,nx,ny,nz,dims[3],starts[3],M,N,P;
621   Mat            A;
622   MPI_Comm       comm;
623   MatType        Atype;
624   PetscSection   section, sectionGlobal;
625   void           (*aij)(void)=PETSC_NULL,(*baij)(void)=PETSC_NULL,(*sbaij)(void)=PETSC_NULL;
626   MatType        ttype[256];
627   PetscBool      flg;
628   PetscMPIInt    size;
629   DM_DA          *dd = (DM_DA*)da->data;
630 
631   PetscFunctionBegin;
632 #ifndef PETSC_USE_DYNAMIC_LIBRARIES
633   ierr = MatInitializePackage(PETSC_NULL);CHKERRQ(ierr);
634 #endif
635   if (!mtype) mtype = MATAIJ;
636   ierr = PetscStrcpy((char*)ttype,mtype);CHKERRQ(ierr);
637   ierr = PetscOptionsBegin(((PetscObject)da)->comm,((PetscObject)da)->prefix,"DMDA options","Mat");CHKERRQ(ierr);
638   ierr = PetscOptionsList("-dm_mat_type","Matrix type","MatSetType",MatList,mtype,(char*)ttype,256,&flg);CHKERRQ(ierr);
639   ierr = PetscOptionsEnd();
640 
641   ierr = DMGetDefaultSection(da, &section);CHKERRQ(ierr);
642   if (section) {
643     PetscInt  bs = -1;
644     PetscInt  localSize;
645     PetscBool isShell, isBlock, isSeqBlock, isMPIBlock, isSymBlock, isSymSeqBlock, isSymMPIBlock, isSymmetric;
646 
647     ierr = DMGetDefaultGlobalSection(da, &sectionGlobal);CHKERRQ(ierr);
648     ierr = PetscSectionGetConstrainedStorageSize(sectionGlobal, &localSize);CHKERRQ(ierr);
649     ierr = MatCreate(((PetscObject) da)->comm, J);CHKERRQ(ierr);
650     ierr = MatSetSizes(*J, localSize, localSize, PETSC_DETERMINE, PETSC_DETERMINE);CHKERRQ(ierr);
651     ierr = MatSetType(*J, mtype);CHKERRQ(ierr);
652     ierr = MatSetFromOptions(*J);CHKERRQ(ierr);
653     ierr = PetscStrcmp(mtype, MATSHELL, &isShell);CHKERRQ(ierr);
654     ierr = PetscStrcmp(mtype, MATBAIJ, &isBlock);CHKERRQ(ierr);
655     ierr = PetscStrcmp(mtype, MATSEQBAIJ, &isSeqBlock);CHKERRQ(ierr);
656     ierr = PetscStrcmp(mtype, MATMPIBAIJ, &isMPIBlock);CHKERRQ(ierr);
657     ierr = PetscStrcmp(mtype, MATSBAIJ, &isSymBlock);CHKERRQ(ierr);
658     ierr = PetscStrcmp(mtype, MATSEQSBAIJ, &isSymSeqBlock);CHKERRQ(ierr);
659     ierr = PetscStrcmp(mtype, MATMPISBAIJ, &isSymMPIBlock);CHKERRQ(ierr);
660     /* Check for symmetric storage */
661     isSymmetric = (PetscBool) (isSymBlock || isSymSeqBlock || isSymMPIBlock);
662     if (isSymmetric) {
663       ierr = MatSetOption(*J, MAT_IGNORE_LOWER_TRIANGULAR, PETSC_TRUE);CHKERRQ(ierr);
664     }
665     if (!isShell) {
666       /* PetscBool fillMatrix = (PetscBool) !da->prealloc_only; */
667       PetscInt *dnz, *onz, *dnzu, *onzu, bsLocal;
668 
669       if (bs < 0) {
670         if (isBlock || isSeqBlock || isMPIBlock || isSymBlock || isSymSeqBlock || isSymMPIBlock) {
671           PetscInt pStart, pEnd, p, dof;
672 
673           ierr = PetscSectionGetChart(sectionGlobal, &pStart, &pEnd);CHKERRQ(ierr);
674           for (p = pStart; p < pEnd; ++p) {
675             ierr = PetscSectionGetDof(sectionGlobal, p, &dof);CHKERRQ(ierr);
676             if (dof) {
677               bs = dof;
678               break;
679             }
680           }
681         } else {
682           bs = 1;
683         }
684         /* Must have same blocksize on all procs (some might have no points) */
685         bsLocal = bs;
686         ierr = MPI_Allreduce(&bsLocal, &bs, 1, MPIU_INT, MPI_MAX, ((PetscObject) da)->comm);CHKERRQ(ierr);
687       }
688       ierr = PetscMalloc4(localSize/bs, PetscInt, &dnz, localSize/bs, PetscInt, &onz, localSize/bs, PetscInt, &dnzu, localSize/bs, PetscInt, &onzu);CHKERRQ(ierr);
689       ierr = PetscMemzero(dnz,  localSize/bs * sizeof(PetscInt));CHKERRQ(ierr);
690       ierr = PetscMemzero(onz,  localSize/bs * sizeof(PetscInt));CHKERRQ(ierr);
691       ierr = PetscMemzero(dnzu, localSize/bs * sizeof(PetscInt));CHKERRQ(ierr);
692       ierr = PetscMemzero(onzu, localSize/bs * sizeof(PetscInt));CHKERRQ(ierr);
693       /* ierr = DMComplexPreallocateOperator(dm, bs, section, sectionGlobal, dnz, onz, dnzu, onzu, *J, fillMatrix);CHKERRQ(ierr); */
694       ierr = PetscFree4(dnz, onz, dnzu, onzu);CHKERRQ(ierr);
695     }
696   }
697   /*
698                                   m
699           ------------------------------------------------------
700          |                                                     |
701          |                                                     |
702          |               ----------------------                |
703          |               |                    |                |
704       n  |           ny  |                    |                |
705          |               |                    |                |
706          |               .---------------------                |
707          |             (xs,ys)     nx                          |
708          |            .                                        |
709          |         (gxs,gys)                                   |
710          |                                                     |
711           -----------------------------------------------------
712   */
713 
714   /*
715          nc - number of components per grid point
716          col - number of colors needed in one direction for single component problem
717 
718   */
719   M = dd->M;
720   N = dd->N;
721   P = dd->P;
722   dim = dd->dim;
723   dof = dd->w;
724   /* ierr = DMDAGetInfo(da,&dim,&M,&N,&P,0,0,0,&dof,0,0,0,0,0);CHKERRQ(ierr); */
725   ierr = DMDAGetCorners(da,0,0,0,&nx,&ny,&nz);CHKERRQ(ierr);
726   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
727   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
728   ierr = MatSetSizes(A,dof*nx*ny*nz,dof*nx*ny*nz,dof*M*N*P,dof*M*N*P);CHKERRQ(ierr);
729   ierr = MatSetType(A,(MatType)ttype);CHKERRQ(ierr);
730   ierr = MatSetDM(A,da);CHKERRQ(ierr);
731   ierr = MatSetFromOptions(A);CHKERRQ(ierr);
732   ierr = MatGetType(A,&Atype);CHKERRQ(ierr);
733   /*
734      We do not provide a getmatrix function in the DMDA operations because
735    the basic DMDA does not know about matrices. We think of DMDA as being more
736    more low-level than matrices. This is kind of cheating but, cause sometimes
737    we think of DMDA has higher level than matrices.
738 
739      We could switch based on Atype (or mtype), but we do not since the
740    specialized setting routines depend only the particular preallocation
741    details of the matrix, not the type itself.
742   */
743   ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJSetPreallocation_C",&aij);CHKERRQ(ierr);
744   if (!aij) {
745     ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJSetPreallocation_C",&aij);CHKERRQ(ierr);
746   }
747   if (!aij) {
748     ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIBAIJSetPreallocation_C",&baij);CHKERRQ(ierr);
749     if (!baij) {
750       ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",&baij);CHKERRQ(ierr);
751     }
752     if (!baij){
753       ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPISBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr);
754       if (!sbaij) {
755         ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr);
756       }
757     }
758   }
759   if (aij) {
760     if (dim == 1) {
761       if (dd->ofill) {
762         ierr = DMCreateMatrix_DA_1d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
763       } else {
764         ierr = DMCreateMatrix_DA_1d_MPIAIJ(da,A);CHKERRQ(ierr);
765       }
766     } else if (dim == 2) {
767       if (dd->ofill) {
768         ierr = DMCreateMatrix_DA_2d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
769       } else {
770         ierr = DMCreateMatrix_DA_2d_MPIAIJ(da,A);CHKERRQ(ierr);
771       }
772     } else if (dim == 3) {
773       if (dd->ofill) {
774         ierr = DMCreateMatrix_DA_3d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
775       } else {
776         ierr = DMCreateMatrix_DA_3d_MPIAIJ(da,A);CHKERRQ(ierr);
777       }
778     }
779   } else if (baij) {
780     if (dim == 2) {
781       ierr = DMCreateMatrix_DA_2d_MPIBAIJ(da,A);CHKERRQ(ierr);
782     } else if (dim == 3) {
783       ierr = DMCreateMatrix_DA_3d_MPIBAIJ(da,A);CHKERRQ(ierr);
784     } else  SETERRQ3(((PetscObject)da)->comm,PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
785   } else if (sbaij) {
786     if (dim == 2) {
787       ierr = DMCreateMatrix_DA_2d_MPISBAIJ(da,A);CHKERRQ(ierr);
788     } else if (dim == 3) {
789       ierr = DMCreateMatrix_DA_3d_MPISBAIJ(da,A);CHKERRQ(ierr);
790     } else SETERRQ3(((PetscObject)da)->comm,PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
791   } else {
792     ISLocalToGlobalMapping ltog,ltogb;
793     ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
794     ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
795     ierr = MatSetUp(A);CHKERRQ(ierr);
796     ierr = MatSetLocalToGlobalMapping(A,ltog,ltog);CHKERRQ(ierr);
797     ierr = MatSetLocalToGlobalMappingBlock(A,ltogb,ltogb);CHKERRQ(ierr);
798   }
799   ierr = DMDAGetGhostCorners(da,&starts[0],&starts[1],&starts[2],&dims[0],&dims[1],&dims[2]);CHKERRQ(ierr);
800   ierr = MatSetStencil(A,dim,dims,starts,dof);CHKERRQ(ierr);
801   ierr = MatSetDM(A,da);CHKERRQ(ierr);
802   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
803   if (size > 1) {
804     /* change viewer to display matrix in natural ordering */
805     ierr = MatShellSetOperation(A, MATOP_VIEW, (void (*)(void)) MatView_MPI_DA);CHKERRQ(ierr);
806     ierr = MatShellSetOperation(A, MATOP_LOAD, (void (*)(void)) MatLoad_MPI_DA);CHKERRQ(ierr);
807   }
808   *J = A;
809   PetscFunctionReturn(0);
810 }
811 
812 /* ---------------------------------------------------------------------------------*/
813 #undef __FUNCT__
814 #define __FUNCT__ "DMCreateMatrix_DA_2d_MPIAIJ"
815 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da,Mat J)
816 {
817   PetscErrorCode         ierr;
818   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = PETSC_NULL,k,nc,*rows = PETSC_NULL,col,cnt,l,p;
819   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
820   MPI_Comm               comm;
821   PetscScalar            *values;
822   DMDABoundaryType       bx,by;
823   ISLocalToGlobalMapping ltog,ltogb;
824   DMDAStencilType        st;
825 
826   PetscFunctionBegin;
827   /*
828          nc - number of components per grid point
829          col - number of colors needed in one direction for single component problem
830 
831   */
832   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
833   col = 2*s + 1;
834   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
835   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
836   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
837 
838   ierr = PetscMalloc2(nc,PetscInt,&rows,col*col*nc*nc,PetscInt,&cols);CHKERRQ(ierr);
839   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
840   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
841 
842   /* determine the matrix preallocation information */
843   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
844   for (i=xs; i<xs+nx; i++) {
845 
846     pstart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
847     pend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
848 
849     for (j=ys; j<ys+ny; j++) {
850       slot = i - gxs + gnx*(j - gys);
851 
852       lstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
853       lend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
854 
855       cnt  = 0;
856       for (k=0; k<nc; k++) {
857 	for (l=lstart; l<lend+1; l++) {
858 	  for (p=pstart; p<pend+1; p++) {
859 	    if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
860 	      cols[cnt++]  = k + nc*(slot + gnx*l + p);
861 	    }
862 	  }
863 	}
864 	rows[k] = k + nc*(slot);
865       }
866       ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
867     }
868   }
869   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
870   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
871   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
872   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
873 
874   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
875   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
876 
877   /*
878     For each node in the grid: we get the neighbors in the local (on processor ordering
879     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
880     PETSc ordering.
881   */
882   if (!da->prealloc_only) {
883     ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
884     ierr = PetscMemzero(values,col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
885     for (i=xs; i<xs+nx; i++) {
886 
887       pstart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
888       pend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
889 
890       for (j=ys; j<ys+ny; j++) {
891 	slot = i - gxs + gnx*(j - gys);
892 
893 	lstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
894 	lend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
895 
896 	cnt  = 0;
897 	for (k=0; k<nc; k++) {
898 	  for (l=lstart; l<lend+1; l++) {
899 	    for (p=pstart; p<pend+1; p++) {
900 	      if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
901 		cols[cnt++]  = k + nc*(slot + gnx*l + p);
902 	      }
903 	    }
904 	  }
905 	  rows[k]      = k + nc*(slot);
906 	}
907 	ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
908       }
909     }
910     ierr = PetscFree(values);CHKERRQ(ierr);
911     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
912     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
913   }
914   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
915   PetscFunctionReturn(0);
916 }
917 
918 #undef __FUNCT__
919 #define __FUNCT__ "DMCreateMatrix_DA_2d_MPIAIJ_Fill"
920 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da,Mat J)
921 {
922   PetscErrorCode         ierr;
923   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
924   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt,l,p;
925   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
926   DM_DA                  *dd = (DM_DA*)da->data;
927   PetscInt               ifill_col,*ofill = dd->ofill, *dfill = dd->dfill;
928   MPI_Comm               comm;
929   PetscScalar            *values;
930   DMDABoundaryType       bx,by;
931   ISLocalToGlobalMapping ltog,ltogb;
932   DMDAStencilType        st;
933 
934   PetscFunctionBegin;
935   /*
936          nc - number of components per grid point
937          col - number of colors needed in one direction for single component problem
938 
939   */
940   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
941   col = 2*s + 1;
942   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
943   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
944   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
945 
946   ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscInt),&cols);CHKERRQ(ierr);
947   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
948   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
949 
950   /* determine the matrix preallocation information */
951   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
952   for (i=xs; i<xs+nx; i++) {
953 
954     pstart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
955     pend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
956 
957     for (j=ys; j<ys+ny; j++) {
958       slot = i - gxs + gnx*(j - gys);
959 
960       lstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
961       lend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
962 
963       for (k=0; k<nc; k++) {
964         cnt  = 0;
965 	for (l=lstart; l<lend+1; l++) {
966 	  for (p=pstart; p<pend+1; p++) {
967             if (l || p) {
968 	      if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
969                 for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++)
970 		  cols[cnt++]  = ofill[ifill_col] + nc*(slot + gnx*l + p);
971 	      }
972             } else {
973 	      if (dfill) {
974 		for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++)
975 		  cols[cnt++]  = dfill[ifill_col] + nc*(slot + gnx*l + p);
976 	      } else {
977 		for (ifill_col=0; ifill_col<nc; ifill_col++)
978 		  cols[cnt++]  = ifill_col + nc*(slot + gnx*l + p);
979 	      }
980             }
981 	  }
982 	}
983 	row = k + nc*(slot);
984         ierr = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
985       }
986     }
987   }
988   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
989   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
990   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
991   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
992   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
993 
994   /*
995     For each node in the grid: we get the neighbors in the local (on processor ordering
996     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
997     PETSc ordering.
998   */
999   if (!da->prealloc_only) {
1000     ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1001     ierr = PetscMemzero(values,col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1002     for (i=xs; i<xs+nx; i++) {
1003 
1004       pstart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1005       pend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1006 
1007       for (j=ys; j<ys+ny; j++) {
1008 	slot = i - gxs + gnx*(j - gys);
1009 
1010 	lstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1011 	lend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1012 
1013 	for (k=0; k<nc; k++) {
1014 	  cnt  = 0;
1015 	  for (l=lstart; l<lend+1; l++) {
1016 	    for (p=pstart; p<pend+1; p++) {
1017 	      if (l || p) {
1018 		if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
1019 		  for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++)
1020 		    cols[cnt++]  = ofill[ifill_col] + nc*(slot + gnx*l + p);
1021 		}
1022 	      } else {
1023 		if (dfill) {
1024 		  for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++)
1025 		    cols[cnt++]  = dfill[ifill_col] + nc*(slot + gnx*l + p);
1026 		} else {
1027 		  for (ifill_col=0; ifill_col<nc; ifill_col++)
1028 		    cols[cnt++]  = ifill_col + nc*(slot + gnx*l + p);
1029 		}
1030 	      }
1031 	    }
1032 	  }
1033 	  row  = k + nc*(slot);
1034 	  ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1035 	}
1036       }
1037     }
1038     ierr = PetscFree(values);CHKERRQ(ierr);
1039     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1040     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1041   }
1042   ierr = PetscFree(cols);CHKERRQ(ierr);
1043   PetscFunctionReturn(0);
1044 }
1045 
1046 /* ---------------------------------------------------------------------------------*/
1047 
1048 #undef __FUNCT__
1049 #define __FUNCT__ "DMCreateMatrix_DA_3d_MPIAIJ"
1050 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da,Mat J)
1051 {
1052   PetscErrorCode         ierr;
1053   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1054   PetscInt               m,n,dim,s,*cols = PETSC_NULL,k,nc,*rows = PETSC_NULL,col,cnt,l,p,*dnz = PETSC_NULL,*onz = PETSC_NULL;
1055   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1056   MPI_Comm               comm;
1057   PetscScalar            *values;
1058   DMDABoundaryType       bx,by,bz;
1059   ISLocalToGlobalMapping ltog,ltogb;
1060   DMDAStencilType        st;
1061 
1062   PetscFunctionBegin;
1063   /*
1064          nc - number of components per grid point
1065          col - number of colors needed in one direction for single component problem
1066 
1067   */
1068   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1069   col    = 2*s + 1;
1070 
1071   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1072   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1073   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1074 
1075   ierr = PetscMalloc2(nc,PetscInt,&rows,col*col*col*nc*nc,PetscInt,&cols);CHKERRQ(ierr);
1076   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1077   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1078 
1079   /* determine the matrix preallocation information */
1080   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1081   for (i=xs; i<xs+nx; i++) {
1082     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1083     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1084     for (j=ys; j<ys+ny; j++) {
1085       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1086       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1087       for (k=zs; k<zs+nz; k++) {
1088 	kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1089 	kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1090 
1091 	slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1092 
1093 	cnt  = 0;
1094 	for (l=0; l<nc; l++) {
1095 	  for (ii=istart; ii<iend+1; ii++) {
1096 	    for (jj=jstart; jj<jend+1; jj++) {
1097 	      for (kk=kstart; kk<kend+1; kk++) {
1098 		if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1099 		  cols[cnt++]  = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1100 		}
1101 	      }
1102 	    }
1103 	  }
1104 	  rows[l] = l + nc*(slot);
1105 	}
1106 	ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1107       }
1108     }
1109   }
1110   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1111   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1112   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1113   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1114   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1115   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1116 
1117   /*
1118     For each node in the grid: we get the neighbors in the local (on processor ordering
1119     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1120     PETSc ordering.
1121   */
1122   if (!da->prealloc_only) {
1123     ierr = PetscMalloc(col*col*col*nc*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1124     ierr = PetscMemzero(values,col*col*col*nc*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1125     for (i=xs; i<xs+nx; i++) {
1126       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1127       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1128       for (j=ys; j<ys+ny; j++) {
1129 	jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1130 	jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1131 	for (k=zs; k<zs+nz; k++) {
1132 	  kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1133 	  kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1134 
1135 	  slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1136 
1137 	  cnt  = 0;
1138 	  for (l=0; l<nc; l++) {
1139 	    for (ii=istart; ii<iend+1; ii++) {
1140 	      for (jj=jstart; jj<jend+1; jj++) {
1141 		for (kk=kstart; kk<kend+1; kk++) {
1142 		  if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1143 		    cols[cnt++]  = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1144 		  }
1145 		}
1146 	      }
1147 	    }
1148 	    rows[l]      = l + nc*(slot);
1149 	  }
1150 	  ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1151 	}
1152       }
1153     }
1154     ierr = PetscFree(values);CHKERRQ(ierr);
1155     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1156     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1157   }
1158   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1159   PetscFunctionReturn(0);
1160 }
1161 
1162 /* ---------------------------------------------------------------------------------*/
1163 
1164 #undef __FUNCT__
1165 #define __FUNCT__ "DMCreateMatrix_DA_1d_MPIAIJ_Fill"
1166 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da,Mat J)
1167 {
1168   PetscErrorCode         ierr;
1169   DM_DA                  *dd = (DM_DA*)da->data;
1170   PetscInt               xs,nx,i,j,gxs,gnx,row,k,l;
1171   PetscInt               m,dim,s,*cols = PETSC_NULL,nc,col,cnt,*ocols;
1172   PetscInt               *ofill = dd->ofill;
1173   PetscScalar            *values;
1174   DMDABoundaryType       bx;
1175   ISLocalToGlobalMapping ltog,ltogb;
1176   PetscMPIInt            rank,size;
1177 
1178   PetscFunctionBegin;
1179   if (dd->bx == DMDA_BOUNDARY_PERIODIC) SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"With fill provided not implemented with periodic boundary conditions");
1180   ierr = MPI_Comm_rank(((PetscObject)da)->comm,&rank);CHKERRQ(ierr);
1181   ierr = MPI_Comm_size(((PetscObject)da)->comm,&size);CHKERRQ(ierr);
1182 
1183   /*
1184          nc - number of components per grid point
1185          col - number of colors needed in one direction for single component problem
1186 
1187   */
1188   ierr = DMDAGetInfo(da,&dim,&m,0,0,0,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr);
1189   col    = 2*s + 1;
1190 
1191   ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
1192   ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
1193 
1194   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1195   ierr = PetscMalloc2(nx*nc,PetscInt,&cols,nx*nc,PetscInt,&ocols);CHKERRQ(ierr);
1196   ierr = PetscMemzero(cols,nx*nc*sizeof(PetscInt));CHKERRQ(ierr);
1197   ierr = PetscMemzero(ocols,nx*nc*sizeof(PetscInt));CHKERRQ(ierr);
1198 
1199   /*
1200         note should be smaller for first and last process with no periodic
1201         does not handle dfill
1202   */
1203   cnt  = 0;
1204   /* coupling with process to the left */
1205   for (i=0; i<s; i++) {
1206     for (j=0; j<nc; j++){
1207       ocols[cnt] = ((!rank) ? 0 : (s - i)*(ofill[j+1] - ofill[j]));
1208       cols[cnt]  = nc + (s + i)*(ofill[j+1] - ofill[j]);
1209       cnt++;
1210     }
1211   }
1212   for (i=s; i<nx-s; i++) {
1213     for (j=0; j<nc; j++){
1214       cols[cnt]  = nc + 2*s*(ofill[j+1] - ofill[j]);
1215       cnt++;
1216     }
1217   }
1218  /* coupling with process to the right */
1219   for (i=nx-s; i<nx; i++){
1220     for (j=0; j<nc; j++){
1221       ocols[cnt] = ((rank == (size-1)) ? 0 : (i - nx + s + 1)*(ofill[j+1] - ofill[j]));
1222       cols[cnt]  = nc + (s + nx - i - 1)*(ofill[j+1] - ofill[j]);
1223       cnt++;
1224     }
1225   }
1226 
1227   ierr = MatSeqAIJSetPreallocation(J,0,cols);CHKERRQ(ierr);
1228   ierr = MatMPIAIJSetPreallocation(J,0,cols,0,ocols);CHKERRQ(ierr);
1229   ierr = PetscFree2(cols,ocols);CHKERRQ(ierr);
1230 
1231   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1232   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1233   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1234   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1235 
1236   /*
1237     For each node in the grid: we get the neighbors in the local (on processor ordering
1238     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1239     PETSc ordering.
1240   */
1241   if (!da->prealloc_only) {
1242     ierr = PetscMalloc(col*nc*nc*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1243     ierr = PetscMalloc(col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1244     ierr = PetscMemzero(values,col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1245 
1246     row  = xs*nc;
1247     /* coupling with process to the left */
1248     for (i=xs; i<xs+s; i++) {
1249       for (j=0; j<nc; j++){
1250         cnt = 0;
1251         if (rank) {
1252           for (l=0; l<s; l++) {
1253             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1254           }
1255         }
1256         for (k=0; k<nc; k++) {
1257           cols[cnt++] = i*nc + k;
1258         }
1259         for (l=0; l<s; l++) {
1260           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1261         }
1262         ierr = MatSetValues(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1263         row++;
1264       }
1265     }
1266     for (i=xs+s; i<xs+nx-s; i++) {
1267       for (j=0; j<nc; j++){
1268         cnt = 0;
1269         for (l=0; l<s; l++) {
1270           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1271         }
1272         for (k=0; k<nc; k++) {
1273           cols[cnt++] = i*nc + k;
1274         }
1275         for (l=0; l<s; l++) {
1276           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1277         }
1278         ierr = MatSetValues(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1279         row++;
1280       }
1281     }
1282     /* coupling with process to the right */
1283     for (i=xs+nx-s; i<xs+nx; i++){
1284       for (j=0; j<nc; j++){
1285         cnt = 0;
1286         for (l=0; l<s; l++) {
1287           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1288         }
1289         for (k=0; k<nc; k++) {
1290           cols[cnt++] = i*nc + k;
1291         }
1292         if (rank < size-1) {
1293           for (l=0; l<s; l++) {
1294             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1295           }
1296         }
1297         ierr = MatSetValues(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1298         row++;
1299       }
1300     }
1301     ierr = PetscFree(values);CHKERRQ(ierr);
1302     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1303     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1304     ierr = PetscFree(cols);CHKERRQ(ierr);
1305   }
1306   PetscFunctionReturn(0);
1307 }
1308 
1309 /* ---------------------------------------------------------------------------------*/
1310 
1311 #undef __FUNCT__
1312 #define __FUNCT__ "DMCreateMatrix_DA_1d_MPIAIJ"
1313 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da,Mat J)
1314 {
1315   PetscErrorCode         ierr;
1316   PetscInt               xs,nx,i,i1,slot,gxs,gnx;
1317   PetscInt               m,dim,s,*cols = PETSC_NULL,nc,*rows = PETSC_NULL,col,cnt,l;
1318   PetscInt               istart,iend;
1319   PetscScalar            *values;
1320   DMDABoundaryType       bx;
1321   ISLocalToGlobalMapping ltog,ltogb;
1322 
1323   PetscFunctionBegin;
1324   /*
1325          nc - number of components per grid point
1326          col - number of colors needed in one direction for single component problem
1327 
1328   */
1329   ierr = DMDAGetInfo(da,&dim,&m,0,0,0,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr);
1330   col    = 2*s + 1;
1331 
1332   ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
1333   ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
1334 
1335   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1336   ierr = MatSeqAIJSetPreallocation(J,col*nc,0);CHKERRQ(ierr);
1337   ierr = MatMPIAIJSetPreallocation(J,col*nc,0,col*nc,0);CHKERRQ(ierr);
1338 
1339   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1340   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1341   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1342   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1343 
1344   /*
1345     For each node in the grid: we get the neighbors in the local (on processor ordering
1346     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1347     PETSc ordering.
1348   */
1349   if (!da->prealloc_only) {
1350     ierr = PetscMalloc2(nc,PetscInt,&rows,col*nc*nc,PetscInt,&cols);CHKERRQ(ierr);
1351     ierr = PetscMalloc(col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1352     ierr = PetscMemzero(values,col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1353     for (i=xs; i<xs+nx; i++) {
1354       istart = PetscMax(-s,gxs - i);
1355       iend   = PetscMin(s,gxs + gnx - i - 1);
1356       slot   = i - gxs;
1357 
1358       cnt  = 0;
1359       for (l=0; l<nc; l++) {
1360 	for (i1=istart; i1<iend+1; i1++) {
1361 	  cols[cnt++] = l + nc*(slot + i1);
1362 	}
1363 	rows[l]      = l + nc*(slot);
1364       }
1365       ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1366     }
1367     ierr = PetscFree(values);CHKERRQ(ierr);
1368     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1369     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1370     ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1371   }
1372   PetscFunctionReturn(0);
1373 }
1374 
1375 #undef __FUNCT__
1376 #define __FUNCT__ "DMCreateMatrix_DA_2d_MPIBAIJ"
1377 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da,Mat J)
1378 {
1379   PetscErrorCode         ierr;
1380   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1381   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
1382   PetscInt               istart,iend,jstart,jend,ii,jj;
1383   MPI_Comm               comm;
1384   PetscScalar            *values;
1385   DMDABoundaryType       bx,by;
1386   DMDAStencilType        st;
1387   ISLocalToGlobalMapping ltog,ltogb;
1388 
1389   PetscFunctionBegin;
1390   /*
1391      nc - number of components per grid point
1392      col - number of colors needed in one direction for single component problem
1393   */
1394   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
1395   col = 2*s + 1;
1396 
1397   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
1398   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
1399   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1400 
1401   ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1402 
1403   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1404   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1405 
1406   /* determine the matrix preallocation information */
1407   ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
1408   for (i=xs; i<xs+nx; i++) {
1409     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1410     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1411     for (j=ys; j<ys+ny; j++) {
1412       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1413       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1414       slot = i - gxs + gnx*(j - gys);
1415 
1416       /* Find block columns in block row */
1417       cnt  = 0;
1418       for (ii=istart; ii<iend+1; ii++) {
1419         for (jj=jstart; jj<jend+1; jj++) {
1420           if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1421             cols[cnt++]  = slot + ii + gnx*jj;
1422           }
1423         }
1424       }
1425       ierr = MatPreallocateSetLocal(ltogb,1,&slot,ltogb,cnt,cols,dnz,onz);CHKERRQ(ierr);
1426     }
1427   }
1428   ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1429   ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1430   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1431 
1432   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1433   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1434 
1435   /*
1436     For each node in the grid: we get the neighbors in the local (on processor ordering
1437     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1438     PETSc ordering.
1439   */
1440   if (!da->prealloc_only) {
1441     ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1442     ierr = PetscMemzero(values,col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1443     for (i=xs; i<xs+nx; i++) {
1444       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1445       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1446       for (j=ys; j<ys+ny; j++) {
1447 	jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1448 	jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1449 	slot = i - gxs + gnx*(j - gys);
1450 	cnt  = 0;
1451         for (ii=istart; ii<iend+1; ii++) {
1452           for (jj=jstart; jj<jend+1; jj++) {
1453             if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1454               cols[cnt++]  = slot + ii + gnx*jj;
1455             }
1456           }
1457         }
1458 	ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1459       }
1460     }
1461     ierr = PetscFree(values);CHKERRQ(ierr);
1462     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1463     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1464   }
1465   ierr = PetscFree(cols);CHKERRQ(ierr);
1466   PetscFunctionReturn(0);
1467 }
1468 
1469 #undef __FUNCT__
1470 #define __FUNCT__ "DMCreateMatrix_DA_3d_MPIBAIJ"
1471 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da,Mat J)
1472 {
1473   PetscErrorCode         ierr;
1474   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1475   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
1476   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1477   MPI_Comm               comm;
1478   PetscScalar            *values;
1479   DMDABoundaryType       bx,by,bz;
1480   DMDAStencilType        st;
1481   ISLocalToGlobalMapping ltog,ltogb;
1482 
1483   PetscFunctionBegin;
1484   /*
1485          nc - number of components per grid point
1486          col - number of colors needed in one direction for single component problem
1487 
1488   */
1489   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1490   col    = 2*s + 1;
1491 
1492   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1493   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1494   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1495 
1496   ierr  = PetscMalloc(col*col*col*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1497 
1498   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1499   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1500 
1501   /* determine the matrix preallocation information */
1502   ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1503   for (i=xs; i<xs+nx; i++) {
1504     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1505     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1506     for (j=ys; j<ys+ny; j++) {
1507       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1508       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1509       for (k=zs; k<zs+nz; k++) {
1510 	kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1511 	kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1512 
1513 	slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1514 
1515 	/* Find block columns in block row */
1516 	cnt  = 0;
1517         for (ii=istart; ii<iend+1; ii++) {
1518           for (jj=jstart; jj<jend+1; jj++) {
1519             for (kk=kstart; kk<kend+1; kk++) {
1520               if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1521 		cols[cnt++]  = slot + ii + gnx*jj + gnx*gny*kk;
1522 	      }
1523 	    }
1524 	  }
1525 	}
1526 	ierr = MatPreallocateSetLocal(ltogb,1,&slot,ltogb,cnt,cols,dnz,onz);CHKERRQ(ierr);
1527       }
1528     }
1529   }
1530   ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1531   ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1532   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1533 
1534   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1535   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1536 
1537   /*
1538     For each node in the grid: we get the neighbors in the local (on processor ordering
1539     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1540     PETSc ordering.
1541   */
1542   if (!da->prealloc_only) {
1543     ierr  = PetscMalloc(col*col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1544     ierr  = PetscMemzero(values,col*col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1545     for (i=xs; i<xs+nx; i++) {
1546       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1547       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1548       for (j=ys; j<ys+ny; j++) {
1549 	jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1550 	jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1551 	for (k=zs; k<zs+nz; k++) {
1552 	  kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1553 	  kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1554 
1555 	  slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1556 
1557 	  cnt  = 0;
1558           for (ii=istart; ii<iend+1; ii++) {
1559             for (jj=jstart; jj<jend+1; jj++) {
1560               for (kk=kstart; kk<kend+1; kk++) {
1561                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1562                   cols[cnt++]  = slot + ii + gnx*jj + gnx*gny*kk;
1563                 }
1564               }
1565             }
1566           }
1567 	  ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1568 	}
1569       }
1570     }
1571     ierr = PetscFree(values);CHKERRQ(ierr);
1572     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1573     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1574   }
1575   ierr = PetscFree(cols);CHKERRQ(ierr);
1576   PetscFunctionReturn(0);
1577 }
1578 
1579 #undef __FUNCT__
1580 #define __FUNCT__ "L2GFilterUpperTriangular"
1581 /*
1582   This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to
1583   identify in the local ordering with periodic domain.
1584 */
1585 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog,PetscInt *row,PetscInt *cnt,PetscInt col[])
1586 {
1587   PetscErrorCode ierr;
1588   PetscInt       i,n;
1589 
1590   PetscFunctionBegin;
1591   ierr = ISLocalToGlobalMappingApply(ltog,1,row,row);CHKERRQ(ierr);
1592   ierr = ISLocalToGlobalMappingApply(ltog,*cnt,col,col);CHKERRQ(ierr);
1593   for (i=0,n=0; i<*cnt; i++) {
1594     if (col[i] >= *row) col[n++] = col[i];
1595   }
1596   *cnt = n;
1597   PetscFunctionReturn(0);
1598 }
1599 
1600 #undef __FUNCT__
1601 #define __FUNCT__ "DMCreateMatrix_DA_2d_MPISBAIJ"
1602 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da,Mat J)
1603 {
1604   PetscErrorCode         ierr;
1605   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1606   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
1607   PetscInt               istart,iend,jstart,jend,ii,jj;
1608   MPI_Comm               comm;
1609   PetscScalar            *values;
1610   DMDABoundaryType       bx,by;
1611   DMDAStencilType        st;
1612   ISLocalToGlobalMapping ltog,ltogb;
1613 
1614   PetscFunctionBegin;
1615   /*
1616      nc - number of components per grid point
1617      col - number of colors needed in one direction for single component problem
1618   */
1619   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
1620   col = 2*s + 1;
1621 
1622   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
1623   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
1624   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1625 
1626   ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1627 
1628   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1629   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1630 
1631   /* determine the matrix preallocation information */
1632   ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
1633   for (i=xs; i<xs+nx; i++) {
1634     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1635     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1636     for (j=ys; j<ys+ny; j++) {
1637       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1638       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1639       slot = i - gxs + gnx*(j - gys);
1640 
1641       /* Find block columns in block row */
1642       cnt  = 0;
1643       for (ii=istart; ii<iend+1; ii++) {
1644         for (jj=jstart; jj<jend+1; jj++) {
1645           if (st == DMDA_STENCIL_BOX || !ii || !jj) {
1646             cols[cnt++]  = slot + ii + gnx*jj;
1647           }
1648         }
1649       }
1650       ierr = L2GFilterUpperTriangular(ltogb,&slot,&cnt,cols);CHKERRQ(ierr);
1651       ierr = MatPreallocateSymmetricSet(slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
1652     }
1653   }
1654   ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1655   ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1656   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1657 
1658   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1659   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1660 
1661   /*
1662     For each node in the grid: we get the neighbors in the local (on processor ordering
1663     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1664     PETSc ordering.
1665   */
1666   if (!da->prealloc_only) {
1667     ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1668     ierr = PetscMemzero(values,col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1669     for (i=xs; i<xs+nx; i++) {
1670       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1671       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1672       for (j=ys; j<ys+ny; j++) {
1673         jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1674         jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1675         slot = i - gxs + gnx*(j - gys);
1676 
1677         /* Find block columns in block row */
1678         cnt  = 0;
1679         for (ii=istart; ii<iend+1; ii++) {
1680           for (jj=jstart; jj<jend+1; jj++) {
1681             if (st == DMDA_STENCIL_BOX || !ii || !jj) {
1682               cols[cnt++]  = slot + ii + gnx*jj;
1683             }
1684           }
1685         }
1686         ierr = L2GFilterUpperTriangular(ltogb,&slot,&cnt,cols);CHKERRQ(ierr);
1687 	ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1688       }
1689     }
1690     ierr = PetscFree(values);CHKERRQ(ierr);
1691     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1692     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1693   }
1694   ierr = PetscFree(cols);CHKERRQ(ierr);
1695   PetscFunctionReturn(0);
1696 }
1697 
1698 #undef __FUNCT__
1699 #define __FUNCT__ "DMCreateMatrix_DA_3d_MPISBAIJ"
1700 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da,Mat J)
1701 {
1702   PetscErrorCode         ierr;
1703   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1704   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
1705   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1706   MPI_Comm               comm;
1707   PetscScalar            *values;
1708   DMDABoundaryType       bx,by,bz;
1709   DMDAStencilType        st;
1710   ISLocalToGlobalMapping ltog,ltogb;
1711 
1712   PetscFunctionBegin;
1713   /*
1714      nc - number of components per grid point
1715      col - number of colors needed in one direction for single component problem
1716   */
1717   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1718   col = 2*s + 1;
1719 
1720   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1721   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1722   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1723 
1724   /* create the matrix */
1725   ierr = PetscMalloc(col*col*col*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1726 
1727   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1728   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1729 
1730   /* determine the matrix preallocation information */
1731   ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1732   for (i=xs; i<xs+nx; i++) {
1733     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1734     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1735     for (j=ys; j<ys+ny; j++) {
1736       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1737       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1738       for (k=zs; k<zs+nz; k++) {
1739         kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1740 	kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1741 
1742 	slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1743 
1744 	/* Find block columns in block row */
1745 	cnt  = 0;
1746         for (ii=istart; ii<iend+1; ii++) {
1747           for (jj=jstart; jj<jend+1; jj++) {
1748             for (kk=kstart; kk<kend+1; kk++) {
1749               if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
1750                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1751               }
1752             }
1753           }
1754         }
1755         ierr = L2GFilterUpperTriangular(ltogb,&slot,&cnt,cols);CHKERRQ(ierr);
1756         ierr = MatPreallocateSymmetricSet(slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
1757       }
1758     }
1759   }
1760   ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1761   ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1762   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1763 
1764   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1765   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1766 
1767   /*
1768     For each node in the grid: we get the neighbors in the local (on processor ordering
1769     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1770     PETSc ordering.
1771   */
1772   if (!da->prealloc_only) {
1773     ierr = PetscMalloc(col*col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1774     ierr = PetscMemzero(values,col*col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1775     for (i=xs; i<xs+nx; i++) {
1776       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1777       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1778       for (j=ys; j<ys+ny; j++) {
1779         jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1780 	jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1781 	for (k=zs; k<zs+nz; k++) {
1782           kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1783 	  kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1784 
1785 	  slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1786 
1787 	  cnt  = 0;
1788           for (ii=istart; ii<iend+1; ii++) {
1789             for (jj=jstart; jj<jend+1; jj++) {
1790               for (kk=kstart; kk<kend+1; kk++) {
1791                 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
1792 		  cols[cnt++]  = slot + ii + gnx*jj + gnx*gny*kk;
1793 		}
1794 	      }
1795 	    }
1796 	  }
1797           ierr = L2GFilterUpperTriangular(ltogb,&slot,&cnt,cols);CHKERRQ(ierr);
1798           ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1799 	}
1800       }
1801     }
1802     ierr = PetscFree(values);CHKERRQ(ierr);
1803     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1804     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1805   }
1806   ierr = PetscFree(cols);CHKERRQ(ierr);
1807   PetscFunctionReturn(0);
1808 }
1809 
1810 /* ---------------------------------------------------------------------------------*/
1811 
1812 #undef __FUNCT__
1813 #define __FUNCT__ "DMCreateMatrix_DA_3d_MPIAIJ_Fill"
1814 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da,Mat J)
1815 {
1816   PetscErrorCode         ierr;
1817   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1818   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt,l,p,*dnz,*onz;
1819   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1820   DM_DA                  *dd = (DM_DA*)da->data;
1821   PetscInt               ifill_col,*dfill = dd->dfill,*ofill = dd->ofill;
1822   MPI_Comm               comm;
1823   PetscScalar            *values;
1824   DMDABoundaryType       bx,by,bz;
1825   ISLocalToGlobalMapping ltog,ltogb;
1826   DMDAStencilType        st;
1827 
1828   PetscFunctionBegin;
1829   /*
1830          nc - number of components per grid point
1831          col - number of colors needed in one direction for single component problem
1832 
1833   */
1834   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1835   col    = 2*s + 1;
1836   if (bx == DMDA_BOUNDARY_PERIODIC && (m % col)) SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
1837                  by 2*stencil_width + 1\n");
1838   if (by == DMDA_BOUNDARY_PERIODIC && (n % col)) SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
1839                  by 2*stencil_width + 1\n");
1840   if (bz == DMDA_BOUNDARY_PERIODIC && (p % col)) SETERRQ(((PetscObject)da)->comm,PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
1841                  by 2*stencil_width + 1\n");
1842 
1843   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1844   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1845   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1846 
1847   ierr = PetscMalloc(col*col*col*nc*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1848   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1849   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1850 
1851   /* determine the matrix preallocation information */
1852   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1853 
1854 
1855   for (i=xs; i<xs+nx; i++) {
1856     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1857     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1858     for (j=ys; j<ys+ny; j++) {
1859       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1860       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1861       for (k=zs; k<zs+nz; k++) {
1862         kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1863         kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1864 
1865         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1866 
1867 	for (l=0; l<nc; l++) {
1868 	  cnt  = 0;
1869 	  for (ii=istart; ii<iend+1; ii++) {
1870 	    for (jj=jstart; jj<jend+1; jj++) {
1871 	      for (kk=kstart; kk<kend+1; kk++) {
1872 		if (ii || jj || kk) {
1873 		  if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1874 		    for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++)
1875 		      cols[cnt++]  = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1876 		  }
1877 		} else {
1878 		  if (dfill) {
1879 		    for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++)
1880 		      cols[cnt++]  = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1881 		  } else {
1882 		    for (ifill_col=0; ifill_col<nc; ifill_col++)
1883 		      cols[cnt++]  = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1884 		  }
1885 		}
1886 	      }
1887 	    }
1888 	  }
1889 	  row  = l + nc*(slot);
1890 	  ierr = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1891 	}
1892       }
1893     }
1894   }
1895   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1896   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1897   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1898   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1899   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1900 
1901   /*
1902     For each node in the grid: we get the neighbors in the local (on processor ordering
1903     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1904     PETSc ordering.
1905   */
1906   if (!da->prealloc_only) {
1907     ierr = PetscMalloc(col*col*col*nc*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1908     ierr = PetscMemzero(values,col*col*col*nc*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1909     for (i=xs; i<xs+nx; i++) {
1910       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1911       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1912       for (j=ys; j<ys+ny; j++) {
1913 	jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1914 	jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1915 	for (k=zs; k<zs+nz; k++) {
1916 	  kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1917 	  kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1918 
1919 	  slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1920 
1921 	  for (l=0; l<nc; l++) {
1922 	    cnt  = 0;
1923 	    for (ii=istart; ii<iend+1; ii++) {
1924 	      for (jj=jstart; jj<jend+1; jj++) {
1925 		for (kk=kstart; kk<kend+1; kk++) {
1926 		  if (ii || jj || kk) {
1927 		    if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1928 		      for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++)
1929 			cols[cnt++]  = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1930 		    }
1931 		  } else {
1932 		    if (dfill) {
1933 		      for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++)
1934 			cols[cnt++]  = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1935 		    } else {
1936 		      for (ifill_col=0; ifill_col<nc; ifill_col++)
1937 			cols[cnt++]  = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1938 		    }
1939 		  }
1940 		}
1941 	      }
1942 	    }
1943 	    row  = l + nc*(slot);
1944 	    ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1945 	  }
1946 	}
1947       }
1948     }
1949     ierr = PetscFree(values);CHKERRQ(ierr);
1950     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1951     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1952   }
1953   ierr = PetscFree(cols);CHKERRQ(ierr);
1954   PetscFunctionReturn(0);
1955 }
1956