xref: /petsc/src/dm/impls/da/fdda.c (revision 3e1910f1ab6113d8365e15c6b8c907ccce7ce4ea)
1 
2 #include <petsc-private/dmdaimpl.h> /*I      "petscdmda.h"     I*/
3 #include <petscmat.h>
4 #include <petsc-private/matimpl.h>
5 
6 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM,ISColoringType,ISColoring*);
7 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM,ISColoringType,ISColoring*);
8 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM,ISColoringType,ISColoring*);
9 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM,ISColoringType,ISColoring*);
10 
11 /*
12    For ghost i that may be negative or greater than the upper bound this
13   maps it into the 0:m-1 range using periodicity
14 */
15 #define SetInRange(i,m) ((i < 0) ? m+i : ((i >= m) ? i-m : i))
16 
17 #undef __FUNCT__
18 #define __FUNCT__ "DMDASetBlockFills_Private"
19 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill,PetscInt w,PetscInt **rfill)
20 {
21   PetscErrorCode ierr;
22   PetscInt       i,j,nz,*fill;
23 
24   PetscFunctionBegin;
25   if (!dfill) PetscFunctionReturn(0);
26 
27   /* count number nonzeros */
28   nz = 0;
29   for (i=0; i<w; i++) {
30     for (j=0; j<w; j++) {
31       if (dfill[w*i+j]) nz++;
32     }
33   }
34   ierr = PetscMalloc((nz + w + 1)*sizeof(PetscInt),&fill);CHKERRQ(ierr);
35   /* construct modified CSR storage of nonzero structure */
36   /*  fill[0 -- w] marks starts of each row of column indices (and end of last row)
37    so fill[1] - fill[0] gives number of nonzeros in first row etc */
38   nz = w + 1;
39   for (i=0; i<w; i++) {
40     fill[i] = nz;
41     for (j=0; j<w; j++) {
42       if (dfill[w*i+j]) {
43         fill[nz] = j;
44         nz++;
45       }
46     }
47   }
48   fill[w] = nz;
49 
50   *rfill = fill;
51   PetscFunctionReturn(0);
52 }
53 
54 #undef __FUNCT__
55 #define __FUNCT__ "DMDASetBlockFills"
56 /*@
57     DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem
58     of the matrix returned by DMCreateMatrix().
59 
60     Logically Collective on DMDA
61 
62     Input Parameter:
63 +   da - the distributed array
64 .   dfill - the fill pattern in the diagonal block (may be NULL, means use dense block)
65 -   ofill - the fill pattern in the off-diagonal blocks
66 
67 
68     Level: developer
69 
70     Notes: This only makes sense when you are doing multicomponent problems but using the
71        MPIAIJ matrix format
72 
73            The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries
74        representing coupling and 0 entries for missing coupling. For example
75 $             dfill[9] = {1, 0, 0,
76 $                         1, 1, 0,
77 $                         0, 1, 1}
78        means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with
79        itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the
80        diagonal block).
81 
82      DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
83      can be represented in the dfill, ofill format
84 
85    Contributed by Glenn Hammond
86 
87 .seealso DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly()
88 
89 @*/
90 PetscErrorCode  DMDASetBlockFills(DM da,const PetscInt *dfill,const PetscInt *ofill)
91 {
92   DM_DA          *dd = (DM_DA*)da->data;
93   PetscErrorCode ierr;
94   PetscInt       i,k,cnt = 1;
95 
96   PetscFunctionBegin;
97   ierr = DMDASetBlockFills_Private(dfill,dd->w,&dd->dfill);CHKERRQ(ierr);
98   ierr = DMDASetBlockFills_Private(ofill,dd->w,&dd->ofill);CHKERRQ(ierr);
99 
100   /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of
101    columns to the left with any nonzeros in them plus 1 */
102   ierr = PetscMalloc(dd->w*sizeof(PetscBool),&dd->ofillcols);CHKERRQ(ierr);
103   ierr = PetscMemzero(dd->ofillcols,dd->w*sizeof(PetscBool));CHKERRQ(ierr);
104   for (i=0; i<dd->w; i++) {
105     for (k=dd->ofill[i]; k<dd->ofill[i+1]; k++) dd->ofillcols[dd->ofill[k]] = 1;
106   }
107   for (i=0; i<dd->w; i++) {
108     if (dd->ofillcols[i]) {
109       dd->ofillcols[i] = cnt++;
110     }
111   }
112   PetscFunctionReturn(0);
113 }
114 
115 
116 #undef __FUNCT__
117 #define __FUNCT__ "DMCreateColoring_DA"
118 PetscErrorCode  DMCreateColoring_DA(DM da,ISColoringType ctype,MatType mtype,ISColoring *coloring)
119 {
120   PetscErrorCode   ierr;
121   PetscInt         dim,m,n,p,nc;
122   DMDABoundaryType bx,by,bz;
123   MPI_Comm         comm;
124   PetscMPIInt      size;
125   PetscBool        isBAIJ;
126   DM_DA            *dd = (DM_DA*)da->data;
127 
128   PetscFunctionBegin;
129   /*
130                                   m
131           ------------------------------------------------------
132          |                                                     |
133          |                                                     |
134          |               ----------------------                |
135          |               |                    |                |
136       n  |           yn  |                    |                |
137          |               |                    |                |
138          |               .---------------------                |
139          |             (xs,ys)     xn                          |
140          |            .                                        |
141          |         (gxs,gys)                                   |
142          |                                                     |
143           -----------------------------------------------------
144   */
145 
146   /*
147          nc - number of components per grid point
148          col - number of colors needed in one direction for single component problem
149 
150   */
151   ierr = DMDAGetInfo(da,&dim,0,0,0,&m,&n,&p,&nc,0,&bx,&by,&bz,0);CHKERRQ(ierr);
152 
153   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
154   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
155   if (ctype == IS_COLORING_GHOSTED) {
156     if (size == 1) {
157       ctype = IS_COLORING_GLOBAL;
158     } else if (dim > 1) {
159       if ((m==1 && bx == DMDA_BOUNDARY_PERIODIC) || (n==1 && by == DMDA_BOUNDARY_PERIODIC) || (p==1 && bz == DMDA_BOUNDARY_PERIODIC)) {
160         SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"IS_COLORING_GHOSTED cannot be used for periodic boundary condition having both ends of the domain  on the same process");
161       }
162     }
163   }
164 
165   /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ
166      matrices is for the blocks, not the individual matrix elements  */
167   ierr = PetscStrcmp(mtype,MATBAIJ,&isBAIJ);CHKERRQ(ierr);
168   if (!isBAIJ) {ierr = PetscStrcmp(mtype,MATMPIBAIJ,&isBAIJ);CHKERRQ(ierr);}
169   if (!isBAIJ) {ierr = PetscStrcmp(mtype,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);}
170   if (isBAIJ) {
171     dd->w  = 1;
172     dd->xs = dd->xs/nc;
173     dd->xe = dd->xe/nc;
174     dd->Xs = dd->Xs/nc;
175     dd->Xe = dd->Xe/nc;
176   }
177 
178   /*
179      We do not provide a getcoloring function in the DMDA operations because
180    the basic DMDA does not know about matrices. We think of DMDA as being more
181    more low-level then matrices.
182   */
183   if (dim == 1) {
184     ierr = DMCreateColoring_DA_1d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
185   } else if (dim == 2) {
186     ierr =  DMCreateColoring_DA_2d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
187   } else if (dim == 3) {
188     ierr =  DMCreateColoring_DA_3d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
189   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not done for %D dimension, send us mail petsc-maint@mcs.anl.gov for code",dim);
190   if (isBAIJ) {
191     dd->w  = nc;
192     dd->xs = dd->xs*nc;
193     dd->xe = dd->xe*nc;
194     dd->Xs = dd->Xs*nc;
195     dd->Xe = dd->Xe*nc;
196   }
197   PetscFunctionReturn(0);
198 }
199 
200 /* ---------------------------------------------------------------------------------*/
201 
202 #undef __FUNCT__
203 #define __FUNCT__ "DMCreateColoring_DA_2d_MPIAIJ"
204 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
205 {
206   PetscErrorCode   ierr;
207   PetscInt         xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,M,N,dim,s,k,nc,col;
208   PetscInt         ncolors;
209   MPI_Comm         comm;
210   DMDABoundaryType bx,by;
211   DMDAStencilType  st;
212   ISColoringValue  *colors;
213   DM_DA            *dd = (DM_DA*)da->data;
214 
215   PetscFunctionBegin;
216   /*
217          nc - number of components per grid point
218          col - number of colors needed in one direction for single component problem
219 
220   */
221   ierr = DMDAGetInfo(da,&dim,&m,&n,0,&M,&N,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
222   col  = 2*s + 1;
223   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
224   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
225   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
226 
227   /* special case as taught to us by Paul Hovland */
228   if (st == DMDA_STENCIL_STAR && s == 1) {
229     ierr = DMCreateColoring_DA_2d_5pt_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
230   } else {
231 
232     if (bx == DMDA_BOUNDARY_PERIODIC && (m % col)) SETERRQ2(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X (%d) is divisible\n\
233                                                             by 2*stencil_width + 1 (%d)\n", m, col);
234     if (by == DMDA_BOUNDARY_PERIODIC && (n % col)) SETERRQ2(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y (%d) is divisible\n\
235                                                             by 2*stencil_width + 1 (%d)\n", n, col);
236     if (ctype == IS_COLORING_GLOBAL) {
237       if (!dd->localcoloring) {
238         ierr = PetscMalloc(nc*nx*ny*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
239         ii   = 0;
240         for (j=ys; j<ys+ny; j++) {
241           for (i=xs; i<xs+nx; i++) {
242             for (k=0; k<nc; k++) {
243               colors[ii++] = k + nc*((i % col) + col*(j % col));
244             }
245           }
246         }
247         ncolors = nc + nc*(col-1 + col*(col-1));
248         ierr    = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,&dd->localcoloring);CHKERRQ(ierr);
249       }
250       *coloring = dd->localcoloring;
251     } else if (ctype == IS_COLORING_GHOSTED) {
252       if (!dd->ghostedcoloring) {
253         ierr = PetscMalloc(nc*gnx*gny*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
254         ii   = 0;
255         for (j=gys; j<gys+gny; j++) {
256           for (i=gxs; i<gxs+gnx; i++) {
257             for (k=0; k<nc; k++) {
258               /* the complicated stuff is to handle periodic boundaries */
259               colors[ii++] = k + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col));
260             }
261           }
262         }
263         ncolors = nc + nc*(col - 1 + col*(col-1));
264         ierr    = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,&dd->ghostedcoloring);CHKERRQ(ierr);
265         /* PetscIntView(ncolors,(PetscInt*)colors,0); */
266 
267         ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_GHOSTED);CHKERRQ(ierr);
268       }
269       *coloring = dd->ghostedcoloring;
270     } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
271   }
272   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
273   PetscFunctionReturn(0);
274 }
275 
276 /* ---------------------------------------------------------------------------------*/
277 
278 #undef __FUNCT__
279 #define __FUNCT__ "DMCreateColoring_DA_3d_MPIAIJ"
280 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
281 {
282   PetscErrorCode   ierr;
283   PetscInt         xs,ys,nx,ny,i,j,gxs,gys,gnx,gny,m,n,p,dim,s,k,nc,col,zs,gzs,ii,l,nz,gnz,M,N,P;
284   PetscInt         ncolors;
285   MPI_Comm         comm;
286   DMDABoundaryType bx,by,bz;
287   DMDAStencilType  st;
288   ISColoringValue  *colors;
289   DM_DA            *dd = (DM_DA*)da->data;
290 
291   PetscFunctionBegin;
292   /*
293          nc - number of components per grid point
294          col - number of colors needed in one direction for single component problem
295 
296   */
297   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
298   col  = 2*s + 1;
299   if (bx == DMDA_BOUNDARY_PERIODIC && (m % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
300                                                          by 2*stencil_width + 1\n");
301   if (by == DMDA_BOUNDARY_PERIODIC && (n % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
302                                                          by 2*stencil_width + 1\n");
303   if (bz == DMDA_BOUNDARY_PERIODIC && (p % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
304                                                          by 2*stencil_width + 1\n");
305 
306   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
307   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
308   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
309 
310   /* create the coloring */
311   if (ctype == IS_COLORING_GLOBAL) {
312     if (!dd->localcoloring) {
313       ierr = PetscMalloc(nc*nx*ny*nz*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
314       ii   = 0;
315       for (k=zs; k<zs+nz; k++) {
316         for (j=ys; j<ys+ny; j++) {
317           for (i=xs; i<xs+nx; i++) {
318             for (l=0; l<nc; l++) {
319               colors[ii++] = l + nc*((i % col) + col*(j % col) + col*col*(k % col));
320             }
321           }
322         }
323       }
324       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
325       ierr    = ISColoringCreate(comm,ncolors,nc*nx*ny*nz,colors,&dd->localcoloring);CHKERRQ(ierr);
326     }
327     *coloring = dd->localcoloring;
328   } else if (ctype == IS_COLORING_GHOSTED) {
329     if (!dd->ghostedcoloring) {
330       ierr = PetscMalloc(nc*gnx*gny*gnz*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
331       ii   = 0;
332       for (k=gzs; k<gzs+gnz; k++) {
333         for (j=gys; j<gys+gny; j++) {
334           for (i=gxs; i<gxs+gnx; i++) {
335             for (l=0; l<nc; l++) {
336               /* the complicated stuff is to handle periodic boundaries */
337               colors[ii++] = l + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col) + col*col*(SetInRange(k,p) % col));
338             }
339           }
340         }
341       }
342       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
343       ierr    = ISColoringCreate(comm,ncolors,nc*gnx*gny*gnz,colors,&dd->ghostedcoloring);CHKERRQ(ierr);
344       ierr    = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_GHOSTED);CHKERRQ(ierr);
345     }
346     *coloring = dd->ghostedcoloring;
347   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
348   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
349   PetscFunctionReturn(0);
350 }
351 
352 /* ---------------------------------------------------------------------------------*/
353 
354 #undef __FUNCT__
355 #define __FUNCT__ "DMCreateColoring_DA_1d_MPIAIJ"
356 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
357 {
358   PetscErrorCode   ierr;
359   PetscInt         xs,nx,i,i1,gxs,gnx,l,m,M,dim,s,nc,col;
360   PetscInt         ncolors;
361   MPI_Comm         comm;
362   DMDABoundaryType bx;
363   ISColoringValue  *colors;
364   DM_DA            *dd = (DM_DA*)da->data;
365 
366   PetscFunctionBegin;
367   /*
368          nc - number of components per grid point
369          col - number of colors needed in one direction for single component problem
370 
371   */
372   ierr = DMDAGetInfo(da,&dim,&m,0,0,&M,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr);
373   col  = 2*s + 1;
374 
375   if (bx == DMDA_BOUNDARY_PERIODIC && (m % col)) SETERRQ2(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points %d is divisible\n\
376                                                           by 2*stencil_width + 1 %d\n",(int)m,(int)col);
377 
378   ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
379   ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
380   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
381 
382   /* create the coloring */
383   if (ctype == IS_COLORING_GLOBAL) {
384     if (!dd->localcoloring) {
385       ierr = PetscMalloc(nc*nx*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
386       if (dd->ofillcols) {
387         PetscInt tc = 0;
388         for (i=0; i<nc; i++) tc += (PetscInt) (dd->ofillcols[i] > 0);
389         i1 = 0;
390         for (i=xs; i<xs+nx; i++) {
391           for (l=0; l<nc; l++) {
392             if (dd->ofillcols[l] && (i % col)) {
393               colors[i1++] =  nc - 1 + tc*((i % col) - 1) + dd->ofillcols[l];
394             } else {
395               colors[i1++] = l;
396             }
397           }
398         }
399         ncolors = nc + 2*s*tc;
400       } else {
401         i1 = 0;
402         for (i=xs; i<xs+nx; i++) {
403           for (l=0; l<nc; l++) {
404             colors[i1++] = l + nc*(i % col);
405           }
406         }
407         ncolors = nc + nc*(col-1);
408       }
409       ierr = ISColoringCreate(comm,ncolors,nc*nx,colors,&dd->localcoloring);CHKERRQ(ierr);
410     }
411     *coloring = dd->localcoloring;
412   } else if (ctype == IS_COLORING_GHOSTED) {
413     if (!dd->ghostedcoloring) {
414       ierr = PetscMalloc(nc*gnx*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
415       i1   = 0;
416       for (i=gxs; i<gxs+gnx; i++) {
417         for (l=0; l<nc; l++) {
418           /* the complicated stuff is to handle periodic boundaries */
419           colors[i1++] = l + nc*(SetInRange(i,m) % col);
420         }
421       }
422       ncolors = nc + nc*(col-1);
423       ierr    = ISColoringCreate(comm,ncolors,nc*gnx,colors,&dd->ghostedcoloring);CHKERRQ(ierr);
424       ierr    = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_GHOSTED);CHKERRQ(ierr);
425     }
426     *coloring = dd->ghostedcoloring;
427   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
428   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
429   PetscFunctionReturn(0);
430 }
431 
432 #undef __FUNCT__
433 #define __FUNCT__ "DMCreateColoring_DA_2d_5pt_MPIAIJ"
434 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
435 {
436   PetscErrorCode   ierr;
437   PetscInt         xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,dim,s,k,nc;
438   PetscInt         ncolors;
439   MPI_Comm         comm;
440   DMDABoundaryType bx,by;
441   ISColoringValue  *colors;
442   DM_DA            *dd = (DM_DA*)da->data;
443 
444   PetscFunctionBegin;
445   /*
446          nc - number of components per grid point
447          col - number of colors needed in one direction for single component problem
448 
449   */
450   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,0);CHKERRQ(ierr);
451   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
452   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
453   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
454 
455   if (bx == DMDA_BOUNDARY_PERIODIC && (m % 5)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible by 5\n");
456   if (by == DMDA_BOUNDARY_PERIODIC && (n % 5)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible by 5\n");
457 
458   /* create the coloring */
459   if (ctype == IS_COLORING_GLOBAL) {
460     if (!dd->localcoloring) {
461       ierr = PetscMalloc(nc*nx*ny*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
462       ii   = 0;
463       for (j=ys; j<ys+ny; j++) {
464         for (i=xs; i<xs+nx; i++) {
465           for (k=0; k<nc; k++) {
466             colors[ii++] = k + nc*((3*j+i) % 5);
467           }
468         }
469       }
470       ncolors = 5*nc;
471       ierr    = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,&dd->localcoloring);CHKERRQ(ierr);
472     }
473     *coloring = dd->localcoloring;
474   } else if (ctype == IS_COLORING_GHOSTED) {
475     if (!dd->ghostedcoloring) {
476       ierr = PetscMalloc(nc*gnx*gny*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
477       ii = 0;
478       for (j=gys; j<gys+gny; j++) {
479         for (i=gxs; i<gxs+gnx; i++) {
480           for (k=0; k<nc; k++) {
481             colors[ii++] = k + nc*((3*SetInRange(j,n) + SetInRange(i,m)) % 5);
482           }
483         }
484       }
485       ncolors = 5*nc;
486       ierr    = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,&dd->ghostedcoloring);CHKERRQ(ierr);
487       ierr    = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_GHOSTED);CHKERRQ(ierr);
488     }
489     *coloring = dd->ghostedcoloring;
490   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
491   PetscFunctionReturn(0);
492 }
493 
494 /* =========================================================================== */
495 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM,Mat);
496 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM,Mat);
497 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM,Mat);
498 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM,Mat);
499 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM,Mat);
500 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM,Mat);
501 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM,Mat);
502 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM,Mat);
503 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM,Mat);
504 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM,Mat);
505 
506 #undef __FUNCT__
507 #define __FUNCT__ "MatSetupDM"
508 /*@C
509    MatSetupDM - Sets the DMDA that is to be used by the HYPRE_StructMatrix PETSc matrix
510 
511    Logically Collective on Mat
512 
513    Input Parameters:
514 +  mat - the matrix
515 -  da - the da
516 
517    Level: intermediate
518 
519 @*/
520 PetscErrorCode MatSetupDM(Mat mat,DM da)
521 {
522   PetscErrorCode ierr;
523 
524   PetscFunctionBegin;
525   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
526   PetscValidHeaderSpecific(da,DM_CLASSID,1);
527   ierr = PetscTryMethod(mat,"MatSetupDM_C",(Mat,DM),(mat,da));CHKERRQ(ierr);
528   PetscFunctionReturn(0);
529 }
530 
531 #undef __FUNCT__
532 #define __FUNCT__ "MatView_MPI_DA"
533 PetscErrorCode  MatView_MPI_DA(Mat A,PetscViewer viewer)
534 {
535   DM                da;
536   PetscErrorCode    ierr;
537   const char        *prefix;
538   Mat               Anatural;
539   AO                ao;
540   PetscInt          rstart,rend,*petsc,i;
541   IS                is;
542   MPI_Comm          comm;
543   PetscViewerFormat format;
544 
545   PetscFunctionBegin;
546   /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */
547   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
548   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(0);
549 
550   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
551   ierr = MatGetDM(A, &da);CHKERRQ(ierr);
552   if (!da) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
553 
554   ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);
555   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
556   ierr = PetscMalloc((rend-rstart)*sizeof(PetscInt),&petsc);CHKERRQ(ierr);
557   for (i=rstart; i<rend; i++) petsc[i-rstart] = i;
558   ierr = AOApplicationToPetsc(ao,rend-rstart,petsc);CHKERRQ(ierr);
559   ierr = ISCreateGeneral(comm,rend-rstart,petsc,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
560 
561   /* call viewer on natural ordering */
562   ierr = MatGetSubMatrix(A,is,is,MAT_INITIAL_MATRIX,&Anatural);CHKERRQ(ierr);
563   ierr = ISDestroy(&is);CHKERRQ(ierr);
564   ierr = PetscObjectGetOptionsPrefix((PetscObject)A,&prefix);CHKERRQ(ierr);
565   ierr = PetscObjectSetOptionsPrefix((PetscObject)Anatural,prefix);CHKERRQ(ierr);
566   ierr = PetscObjectSetName((PetscObject)Anatural,((PetscObject)A)->name);CHKERRQ(ierr);
567   ierr = MatView(Anatural,viewer);CHKERRQ(ierr);
568   ierr = MatDestroy(&Anatural);CHKERRQ(ierr);
569   PetscFunctionReturn(0);
570 }
571 
572 #undef __FUNCT__
573 #define __FUNCT__ "MatLoad_MPI_DA"
574 PetscErrorCode  MatLoad_MPI_DA(Mat A,PetscViewer viewer)
575 {
576   DM             da;
577   PetscErrorCode ierr;
578   Mat            Anatural,Aapp;
579   AO             ao;
580   PetscInt       rstart,rend,*app,i;
581   IS             is;
582   MPI_Comm       comm;
583 
584   PetscFunctionBegin;
585   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
586   ierr = MatGetDM(A, &da);CHKERRQ(ierr);
587   if (!da) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
588 
589   /* Load the matrix in natural ordering */
590   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Anatural);CHKERRQ(ierr);
591   ierr = MatSetType(Anatural,((PetscObject)A)->type_name);CHKERRQ(ierr);
592   ierr = MatSetSizes(Anatural,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
593   ierr = MatLoad(Anatural,viewer);CHKERRQ(ierr);
594 
595   /* Map natural ordering to application ordering and create IS */
596   ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);
597   ierr = MatGetOwnershipRange(Anatural,&rstart,&rend);CHKERRQ(ierr);
598   ierr = PetscMalloc((rend-rstart)*sizeof(PetscInt),&app);CHKERRQ(ierr);
599   for (i=rstart; i<rend; i++) app[i-rstart] = i;
600   ierr = AOPetscToApplication(ao,rend-rstart,app);CHKERRQ(ierr);
601   ierr = ISCreateGeneral(comm,rend-rstart,app,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
602 
603   /* Do permutation and replace header */
604   ierr = MatGetSubMatrix(Anatural,is,is,MAT_INITIAL_MATRIX,&Aapp);CHKERRQ(ierr);
605   ierr = MatHeaderReplace(A,Aapp);CHKERRQ(ierr);
606   ierr = ISDestroy(&is);CHKERRQ(ierr);
607   ierr = MatDestroy(&Anatural);CHKERRQ(ierr);
608   PetscFunctionReturn(0);
609 }
610 
611 #undef __FUNCT__
612 #define __FUNCT__ "DMCreateMatrix_DA"
613 PetscErrorCode DMCreateMatrix_DA(DM da, MatType mtype,Mat *J)
614 {
615   PetscErrorCode ierr;
616   PetscInt       dim,dof,nx,ny,nz,dims[3],starts[3],M,N,P;
617   Mat            A;
618   MPI_Comm       comm;
619   MatType        Atype;
620   PetscSection   section, sectionGlobal;
621   void           (*aij)(void)=NULL,(*baij)(void)=NULL,(*sbaij)(void)=NULL;
622   MatType        ttype[256];
623   PetscBool      flg;
624   PetscMPIInt    size;
625   DM_DA          *dd = (DM_DA*)da->data;
626 
627   PetscFunctionBegin;
628 #if !defined(PETSC_USE_DYNAMIC_LIBRARIES)
629   ierr = MatInitializePackage();CHKERRQ(ierr);
630 #endif
631   if (!mtype) mtype = MATAIJ;
632   ierr = PetscStrcpy((char*)ttype,mtype);CHKERRQ(ierr);
633   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)da),((PetscObject)da)->prefix,"DMDA options","Mat");CHKERRQ(ierr);
634   ierr = PetscOptionsList("-dm_mat_type","Matrix type","MatSetType",MatList,mtype,(char*)ttype,256,&flg);CHKERRQ(ierr);
635   ierr = PetscOptionsEnd();
636 
637   ierr = DMGetDefaultSection(da, &section);CHKERRQ(ierr);
638   if (section) {
639     PetscInt  bs = -1;
640     PetscInt  localSize;
641     PetscBool isShell, isBlock, isSeqBlock, isMPIBlock, isSymBlock, isSymSeqBlock, isSymMPIBlock, isSymmetric;
642 
643     ierr = DMGetDefaultGlobalSection(da, &sectionGlobal);CHKERRQ(ierr);
644     ierr = PetscSectionGetConstrainedStorageSize(sectionGlobal, &localSize);CHKERRQ(ierr);
645     ierr = MatCreate(PetscObjectComm((PetscObject)da), J);CHKERRQ(ierr);
646     ierr = MatSetSizes(*J, localSize, localSize, PETSC_DETERMINE, PETSC_DETERMINE);CHKERRQ(ierr);
647     ierr = MatSetType(*J, mtype);CHKERRQ(ierr);
648     ierr = MatSetFromOptions(*J);CHKERRQ(ierr);
649     ierr = PetscStrcmp(mtype, MATSHELL, &isShell);CHKERRQ(ierr);
650     ierr = PetscStrcmp(mtype, MATBAIJ, &isBlock);CHKERRQ(ierr);
651     ierr = PetscStrcmp(mtype, MATSEQBAIJ, &isSeqBlock);CHKERRQ(ierr);
652     ierr = PetscStrcmp(mtype, MATMPIBAIJ, &isMPIBlock);CHKERRQ(ierr);
653     ierr = PetscStrcmp(mtype, MATSBAIJ, &isSymBlock);CHKERRQ(ierr);
654     ierr = PetscStrcmp(mtype, MATSEQSBAIJ, &isSymSeqBlock);CHKERRQ(ierr);
655     ierr = PetscStrcmp(mtype, MATMPISBAIJ, &isSymMPIBlock);CHKERRQ(ierr);
656     /* Check for symmetric storage */
657     isSymmetric = (PetscBool) (isSymBlock || isSymSeqBlock || isSymMPIBlock);
658     if (isSymmetric) {
659       ierr = MatSetOption(*J, MAT_IGNORE_LOWER_TRIANGULAR, PETSC_TRUE);CHKERRQ(ierr);
660     }
661     if (!isShell) {
662       PetscInt *dnz, *onz, *dnzu, *onzu, bsLocal;
663 
664       if (bs < 0) {
665         if (isBlock || isSeqBlock || isMPIBlock || isSymBlock || isSymSeqBlock || isSymMPIBlock) {
666           PetscInt pStart, pEnd, p, dof;
667 
668           ierr = PetscSectionGetChart(sectionGlobal, &pStart, &pEnd);CHKERRQ(ierr);
669           for (p = pStart; p < pEnd; ++p) {
670             ierr = PetscSectionGetDof(sectionGlobal, p, &dof);CHKERRQ(ierr);
671             if (dof) {
672               bs = dof;
673               break;
674             }
675           }
676         } else {
677           bs = 1;
678         }
679         /* Must have same blocksize on all procs (some might have no points) */
680         bsLocal = bs;
681         ierr    = MPI_Allreduce(&bsLocal, &bs, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)da));CHKERRQ(ierr);
682       }
683       ierr = PetscMalloc4(localSize/bs, PetscInt, &dnz, localSize/bs, PetscInt, &onz, localSize/bs, PetscInt, &dnzu, localSize/bs, PetscInt, &onzu);CHKERRQ(ierr);
684       ierr = PetscMemzero(dnz,  localSize/bs * sizeof(PetscInt));CHKERRQ(ierr);
685       ierr = PetscMemzero(onz,  localSize/bs * sizeof(PetscInt));CHKERRQ(ierr);
686       ierr = PetscMemzero(dnzu, localSize/bs * sizeof(PetscInt));CHKERRQ(ierr);
687       ierr = PetscMemzero(onzu, localSize/bs * sizeof(PetscInt));CHKERRQ(ierr);
688       /* ierr = DMPlexPreallocateOperator(dm, bs, section, sectionGlobal, dnz, onz, dnzu, onzu, *J, fillMatrix);CHKERRQ(ierr); */
689       ierr = PetscFree4(dnz, onz, dnzu, onzu);CHKERRQ(ierr);
690     }
691   }
692   /*
693                                   m
694           ------------------------------------------------------
695          |                                                     |
696          |                                                     |
697          |               ----------------------                |
698          |               |                    |                |
699       n  |           ny  |                    |                |
700          |               |                    |                |
701          |               .---------------------                |
702          |             (xs,ys)     nx                          |
703          |            .                                        |
704          |         (gxs,gys)                                   |
705          |                                                     |
706           -----------------------------------------------------
707   */
708 
709   /*
710          nc - number of components per grid point
711          col - number of colors needed in one direction for single component problem
712 
713   */
714   M   = dd->M;
715   N   = dd->N;
716   P   = dd->P;
717   dim = dd->dim;
718   dof = dd->w;
719   /* ierr = DMDAGetInfo(da,&dim,&M,&N,&P,0,0,0,&dof,0,0,0,0,0);CHKERRQ(ierr); */
720   ierr = DMDAGetCorners(da,0,0,0,&nx,&ny,&nz);CHKERRQ(ierr);
721   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
722   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
723   ierr = MatSetSizes(A,dof*nx*ny*nz,dof*nx*ny*nz,dof*M*N*P,dof*M*N*P);CHKERRQ(ierr);
724   ierr = MatSetType(A,(MatType)ttype);CHKERRQ(ierr);
725   ierr = MatSetDM(A,da);CHKERRQ(ierr);
726   ierr = MatSetFromOptions(A);CHKERRQ(ierr);
727   ierr = MatGetType(A,&Atype);CHKERRQ(ierr);
728   /*
729      We do not provide a getmatrix function in the DMDA operations because
730    the basic DMDA does not know about matrices. We think of DMDA as being more
731    more low-level than matrices. This is kind of cheating but, cause sometimes
732    we think of DMDA has higher level than matrices.
733 
734      We could switch based on Atype (or mtype), but we do not since the
735    specialized setting routines depend only the particular preallocation
736    details of the matrix, not the type itself.
737   */
738   ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJSetPreallocation_C",&aij);CHKERRQ(ierr);
739   if (!aij) {
740     ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJSetPreallocation_C",&aij);CHKERRQ(ierr);
741   }
742   if (!aij) {
743     ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIBAIJSetPreallocation_C",&baij);CHKERRQ(ierr);
744     if (!baij) {
745       ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",&baij);CHKERRQ(ierr);
746     }
747     if (!baij) {
748       ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPISBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr);
749       if (!sbaij) {
750         ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr);
751       }
752     }
753   }
754   if (aij) {
755     if (dim == 1) {
756       if (dd->ofill) {
757         ierr = DMCreateMatrix_DA_1d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
758       } else {
759         ierr = DMCreateMatrix_DA_1d_MPIAIJ(da,A);CHKERRQ(ierr);
760       }
761     } else if (dim == 2) {
762       if (dd->ofill) {
763         ierr = DMCreateMatrix_DA_2d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
764       } else {
765         ierr = DMCreateMatrix_DA_2d_MPIAIJ(da,A);CHKERRQ(ierr);
766       }
767     } else if (dim == 3) {
768       if (dd->ofill) {
769         ierr = DMCreateMatrix_DA_3d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
770       } else {
771         ierr = DMCreateMatrix_DA_3d_MPIAIJ(da,A);CHKERRQ(ierr);
772       }
773     }
774   } else if (baij) {
775     if (dim == 2) {
776       ierr = DMCreateMatrix_DA_2d_MPIBAIJ(da,A);CHKERRQ(ierr);
777     } else if (dim == 3) {
778       ierr = DMCreateMatrix_DA_3d_MPIBAIJ(da,A);CHKERRQ(ierr);
779     } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
780   } else if (sbaij) {
781     if (dim == 2) {
782       ierr = DMCreateMatrix_DA_2d_MPISBAIJ(da,A);CHKERRQ(ierr);
783     } else if (dim == 3) {
784       ierr = DMCreateMatrix_DA_3d_MPISBAIJ(da,A);CHKERRQ(ierr);
785     } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
786   } else {
787     ISLocalToGlobalMapping ltog,ltogb;
788     ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
789     ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
790     ierr = MatSetUp(A);CHKERRQ(ierr);
791     ierr = MatSetLocalToGlobalMapping(A,ltog,ltog);CHKERRQ(ierr);
792     ierr = MatSetLocalToGlobalMappingBlock(A,ltogb,ltogb);CHKERRQ(ierr);
793   }
794   ierr = DMDAGetGhostCorners(da,&starts[0],&starts[1],&starts[2],&dims[0],&dims[1],&dims[2]);CHKERRQ(ierr);
795   ierr = MatSetStencil(A,dim,dims,starts,dof);CHKERRQ(ierr);
796   ierr = MatSetDM(A,da);CHKERRQ(ierr);
797   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
798   if (size > 1) {
799     /* change viewer to display matrix in natural ordering */
800     ierr = MatShellSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA);CHKERRQ(ierr);
801     ierr = MatShellSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA);CHKERRQ(ierr);
802   }
803   *J = A;
804   PetscFunctionReturn(0);
805 }
806 
807 /* ---------------------------------------------------------------------------------*/
808 #undef __FUNCT__
809 #define __FUNCT__ "DMCreateMatrix_DA_2d_MPIAIJ"
810 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da,Mat J)
811 {
812   PetscErrorCode         ierr;
813   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p;
814   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
815   MPI_Comm               comm;
816   PetscScalar            *values;
817   DMDABoundaryType       bx,by;
818   ISLocalToGlobalMapping ltog,ltogb;
819   DMDAStencilType        st;
820 
821   PetscFunctionBegin;
822   /*
823          nc - number of components per grid point
824          col - number of colors needed in one direction for single component problem
825 
826   */
827   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
828   col  = 2*s + 1;
829   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
830   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
831   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
832 
833   ierr = PetscMalloc2(nc,PetscInt,&rows,col*col*nc*nc,PetscInt,&cols);CHKERRQ(ierr);
834   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
835   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
836 
837   /* determine the matrix preallocation information */
838   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
839   for (i=xs; i<xs+nx; i++) {
840 
841     pstart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
842     pend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
843 
844     for (j=ys; j<ys+ny; j++) {
845       slot = i - gxs + gnx*(j - gys);
846 
847       lstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
848       lend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
849 
850       cnt = 0;
851       for (k=0; k<nc; k++) {
852         for (l=lstart; l<lend+1; l++) {
853           for (p=pstart; p<pend+1; p++) {
854             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
855               cols[cnt++] = k + nc*(slot + gnx*l + p);
856             }
857           }
858         }
859         rows[k] = k + nc*(slot);
860       }
861       ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
862     }
863   }
864   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
865   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
866   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
867   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
868 
869   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
870   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
871 
872   /*
873     For each node in the grid: we get the neighbors in the local (on processor ordering
874     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
875     PETSc ordering.
876   */
877   if (!da->prealloc_only) {
878     ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
879     ierr = PetscMemzero(values,col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
880     for (i=xs; i<xs+nx; i++) {
881 
882       pstart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
883       pend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
884 
885       for (j=ys; j<ys+ny; j++) {
886         slot = i - gxs + gnx*(j - gys);
887 
888         lstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
889         lend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
890 
891         cnt = 0;
892         for (k=0; k<nc; k++) {
893           for (l=lstart; l<lend+1; l++) {
894             for (p=pstart; p<pend+1; p++) {
895               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
896                 cols[cnt++] = k + nc*(slot + gnx*l + p);
897               }
898             }
899           }
900           rows[k] = k + nc*(slot);
901         }
902         ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
903       }
904     }
905     ierr = PetscFree(values);CHKERRQ(ierr);
906     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
907     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
908   }
909   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
910   PetscFunctionReturn(0);
911 }
912 
913 #undef __FUNCT__
914 #define __FUNCT__ "DMCreateMatrix_DA_2d_MPIAIJ_Fill"
915 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da,Mat J)
916 {
917   PetscErrorCode         ierr;
918   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
919   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt,l,p;
920   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
921   DM_DA                  *dd = (DM_DA*)da->data;
922   PetscInt               ifill_col,*ofill = dd->ofill, *dfill = dd->dfill;
923   MPI_Comm               comm;
924   PetscScalar            *values;
925   DMDABoundaryType       bx,by;
926   ISLocalToGlobalMapping ltog,ltogb;
927   DMDAStencilType        st;
928 
929   PetscFunctionBegin;
930   /*
931          nc - number of components per grid point
932          col - number of colors needed in one direction for single component problem
933 
934   */
935   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
936   col  = 2*s + 1;
937   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
938   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
939   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
940 
941   ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscInt),&cols);CHKERRQ(ierr);
942   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
943   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
944 
945   /* determine the matrix preallocation information */
946   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
947   for (i=xs; i<xs+nx; i++) {
948 
949     pstart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
950     pend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
951 
952     for (j=ys; j<ys+ny; j++) {
953       slot = i - gxs + gnx*(j - gys);
954 
955       lstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
956       lend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
957 
958       for (k=0; k<nc; k++) {
959         cnt = 0;
960         for (l=lstart; l<lend+1; l++) {
961           for (p=pstart; p<pend+1; p++) {
962             if (l || p) {
963               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
964                 for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p);
965               }
966             } else {
967               if (dfill) {
968                 for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p);
969               } else {
970                 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p);
971               }
972             }
973           }
974         }
975         row  = k + nc*(slot);
976         ierr = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
977       }
978     }
979   }
980   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
981   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
982   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
983   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
984   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
985 
986   /*
987     For each node in the grid: we get the neighbors in the local (on processor ordering
988     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
989     PETSc ordering.
990   */
991   if (!da->prealloc_only) {
992     ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
993     ierr = PetscMemzero(values,col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
994     for (i=xs; i<xs+nx; i++) {
995 
996       pstart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
997       pend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
998 
999       for (j=ys; j<ys+ny; j++) {
1000         slot = i - gxs + gnx*(j - gys);
1001 
1002         lstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1003         lend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1004 
1005         for (k=0; k<nc; k++) {
1006           cnt = 0;
1007           for (l=lstart; l<lend+1; l++) {
1008             for (p=pstart; p<pend+1; p++) {
1009               if (l || p) {
1010                 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
1011                   for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p);
1012                 }
1013               } else {
1014                 if (dfill) {
1015                   for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p);
1016                 } else {
1017                   for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p);
1018                 }
1019               }
1020             }
1021           }
1022           row  = k + nc*(slot);
1023           ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1024         }
1025       }
1026     }
1027     ierr = PetscFree(values);CHKERRQ(ierr);
1028     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1029     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1030   }
1031   ierr = PetscFree(cols);CHKERRQ(ierr);
1032   PetscFunctionReturn(0);
1033 }
1034 
1035 /* ---------------------------------------------------------------------------------*/
1036 
1037 #undef __FUNCT__
1038 #define __FUNCT__ "DMCreateMatrix_DA_3d_MPIAIJ"
1039 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da,Mat J)
1040 {
1041   PetscErrorCode         ierr;
1042   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1043   PetscInt               m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL;
1044   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1045   MPI_Comm               comm;
1046   PetscScalar            *values;
1047   DMDABoundaryType       bx,by,bz;
1048   ISLocalToGlobalMapping ltog,ltogb;
1049   DMDAStencilType        st;
1050 
1051   PetscFunctionBegin;
1052   /*
1053          nc - number of components per grid point
1054          col - number of colors needed in one direction for single component problem
1055 
1056   */
1057   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1058   col  = 2*s + 1;
1059 
1060   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1061   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1062   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1063 
1064   ierr = PetscMalloc2(nc,PetscInt,&rows,col*col*col*nc*nc,PetscInt,&cols);CHKERRQ(ierr);
1065   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1066   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1067 
1068   /* determine the matrix preallocation information */
1069   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1070   for (i=xs; i<xs+nx; i++) {
1071     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1072     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1073     for (j=ys; j<ys+ny; j++) {
1074       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1075       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1076       for (k=zs; k<zs+nz; k++) {
1077         kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1078         kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1079 
1080         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1081 
1082         cnt = 0;
1083         for (l=0; l<nc; l++) {
1084           for (ii=istart; ii<iend+1; ii++) {
1085             for (jj=jstart; jj<jend+1; jj++) {
1086               for (kk=kstart; kk<kend+1; kk++) {
1087                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1088                   cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1089                 }
1090               }
1091             }
1092           }
1093           rows[l] = l + nc*(slot);
1094         }
1095         ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1096       }
1097     }
1098   }
1099   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1100   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1101   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1102   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1103   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1104   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1105 
1106   /*
1107     For each node in the grid: we get the neighbors in the local (on processor ordering
1108     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1109     PETSc ordering.
1110   */
1111   if (!da->prealloc_only) {
1112     ierr = PetscMalloc(col*col*col*nc*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1113     ierr = PetscMemzero(values,col*col*col*nc*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1114     for (i=xs; i<xs+nx; i++) {
1115       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1116       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1117       for (j=ys; j<ys+ny; j++) {
1118         jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1119         jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1120         for (k=zs; k<zs+nz; k++) {
1121           kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1122           kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1123 
1124           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1125 
1126           cnt = 0;
1127           for (l=0; l<nc; l++) {
1128             for (ii=istart; ii<iend+1; ii++) {
1129               for (jj=jstart; jj<jend+1; jj++) {
1130                 for (kk=kstart; kk<kend+1; kk++) {
1131                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1132                     cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1133                   }
1134                 }
1135               }
1136             }
1137             rows[l] = l + nc*(slot);
1138           }
1139           ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1140         }
1141       }
1142     }
1143     ierr = PetscFree(values);CHKERRQ(ierr);
1144     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1145     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1146   }
1147   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1148   PetscFunctionReturn(0);
1149 }
1150 
1151 /* ---------------------------------------------------------------------------------*/
1152 
1153 #undef __FUNCT__
1154 #define __FUNCT__ "DMCreateMatrix_DA_1d_MPIAIJ_Fill"
1155 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da,Mat J)
1156 {
1157   PetscErrorCode         ierr;
1158   DM_DA                  *dd = (DM_DA*)da->data;
1159   PetscInt               xs,nx,i,j,gxs,gnx,row,k,l;
1160   PetscInt               m,dim,s,*cols = NULL,nc,col,cnt,*ocols;
1161   PetscInt               *ofill = dd->ofill;
1162   PetscScalar            *values;
1163   DMDABoundaryType       bx;
1164   ISLocalToGlobalMapping ltog,ltogb;
1165   PetscMPIInt            rank,size;
1166 
1167   PetscFunctionBegin;
1168   if (dd->bx == DMDA_BOUNDARY_PERIODIC) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"With fill provided not implemented with periodic boundary conditions");
1169   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)da),&rank);CHKERRQ(ierr);
1170   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)da),&size);CHKERRQ(ierr);
1171 
1172   /*
1173          nc - number of components per grid point
1174          col - number of colors needed in one direction for single component problem
1175 
1176   */
1177   ierr = DMDAGetInfo(da,&dim,&m,0,0,0,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr);
1178   col  = 2*s + 1;
1179 
1180   ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
1181   ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
1182 
1183   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1184   ierr = PetscMalloc2(nx*nc,PetscInt,&cols,nx*nc,PetscInt,&ocols);CHKERRQ(ierr);
1185   ierr = PetscMemzero(cols,nx*nc*sizeof(PetscInt));CHKERRQ(ierr);
1186   ierr = PetscMemzero(ocols,nx*nc*sizeof(PetscInt));CHKERRQ(ierr);
1187 
1188   /*
1189         note should be smaller for first and last process with no periodic
1190         does not handle dfill
1191   */
1192   cnt = 0;
1193   /* coupling with process to the left */
1194   for (i=0; i<s; i++) {
1195     for (j=0; j<nc; j++) {
1196       ocols[cnt] = ((!rank) ? 0 : (s - i)*(ofill[j+1] - ofill[j]));
1197       cols[cnt]  = nc + (s + i)*(ofill[j+1] - ofill[j]);
1198       cnt++;
1199     }
1200   }
1201   for (i=s; i<nx-s; i++) {
1202     for (j=0; j<nc; j++) {
1203       cols[cnt] = nc + 2*s*(ofill[j+1] - ofill[j]);
1204       cnt++;
1205     }
1206   }
1207   /* coupling with process to the right */
1208   for (i=nx-s; i<nx; i++) {
1209     for (j=0; j<nc; j++) {
1210       ocols[cnt] = ((rank == (size-1)) ? 0 : (i - nx + s + 1)*(ofill[j+1] - ofill[j]));
1211       cols[cnt]  = nc + (s + nx - i - 1)*(ofill[j+1] - ofill[j]);
1212       cnt++;
1213     }
1214   }
1215 
1216   ierr = MatSeqAIJSetPreallocation(J,0,cols);CHKERRQ(ierr);
1217   ierr = MatMPIAIJSetPreallocation(J,0,cols,0,ocols);CHKERRQ(ierr);
1218   ierr = PetscFree2(cols,ocols);CHKERRQ(ierr);
1219 
1220   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1221   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1222   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1223   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1224 
1225   /*
1226     For each node in the grid: we get the neighbors in the local (on processor ordering
1227     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1228     PETSc ordering.
1229   */
1230   if (!da->prealloc_only) {
1231     ierr = PetscMalloc(col*nc*nc*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1232     ierr = PetscMalloc(col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1233     ierr = PetscMemzero(values,col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1234 
1235     row = xs*nc;
1236     /* coupling with process to the left */
1237     for (i=xs; i<xs+s; i++) {
1238       for (j=0; j<nc; j++) {
1239         cnt = 0;
1240         if (rank) {
1241           for (l=0; l<s; l++) {
1242             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1243           }
1244         }
1245         for (k=0; k<nc; k++) {
1246           cols[cnt++] = i*nc + k;
1247         }
1248         for (l=0; l<s; l++) {
1249           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1250         }
1251         ierr = MatSetValues(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1252         row++;
1253       }
1254     }
1255     for (i=xs+s; i<xs+nx-s; i++) {
1256       for (j=0; j<nc; j++) {
1257         cnt = 0;
1258         for (l=0; l<s; l++) {
1259           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1260         }
1261         for (k=0; k<nc; k++) {
1262           cols[cnt++] = i*nc + k;
1263         }
1264         for (l=0; l<s; l++) {
1265           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1266         }
1267         ierr = MatSetValues(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1268         row++;
1269       }
1270     }
1271     /* coupling with process to the right */
1272     for (i=xs+nx-s; i<xs+nx; i++) {
1273       for (j=0; j<nc; j++) {
1274         cnt = 0;
1275         for (l=0; l<s; l++) {
1276           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1277         }
1278         for (k=0; k<nc; k++) {
1279           cols[cnt++] = i*nc + k;
1280         }
1281         if (rank < size-1) {
1282           for (l=0; l<s; l++) {
1283             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1284           }
1285         }
1286         ierr = MatSetValues(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1287         row++;
1288       }
1289     }
1290     ierr = PetscFree(values);CHKERRQ(ierr);
1291     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1292     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1293     ierr = PetscFree(cols);CHKERRQ(ierr);
1294   }
1295   PetscFunctionReturn(0);
1296 }
1297 
1298 /* ---------------------------------------------------------------------------------*/
1299 
1300 #undef __FUNCT__
1301 #define __FUNCT__ "DMCreateMatrix_DA_1d_MPIAIJ"
1302 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da,Mat J)
1303 {
1304   PetscErrorCode         ierr;
1305   PetscInt               xs,nx,i,i1,slot,gxs,gnx;
1306   PetscInt               m,dim,s,*cols = NULL,nc,*rows = NULL,col,cnt,l;
1307   PetscInt               istart,iend;
1308   PetscScalar            *values;
1309   DMDABoundaryType       bx;
1310   ISLocalToGlobalMapping ltog,ltogb;
1311 
1312   PetscFunctionBegin;
1313   /*
1314          nc - number of components per grid point
1315          col - number of colors needed in one direction for single component problem
1316 
1317   */
1318   ierr = DMDAGetInfo(da,&dim,&m,0,0,0,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr);
1319   col  = 2*s + 1;
1320 
1321   ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
1322   ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
1323 
1324   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1325   ierr = MatSeqAIJSetPreallocation(J,col*nc,0);CHKERRQ(ierr);
1326   ierr = MatMPIAIJSetPreallocation(J,col*nc,0,col*nc,0);CHKERRQ(ierr);
1327 
1328   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1329   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1330   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1331   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1332 
1333   /*
1334     For each node in the grid: we get the neighbors in the local (on processor ordering
1335     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1336     PETSc ordering.
1337   */
1338   if (!da->prealloc_only) {
1339     ierr = PetscMalloc2(nc,PetscInt,&rows,col*nc*nc,PetscInt,&cols);CHKERRQ(ierr);
1340     ierr = PetscMalloc(col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1341     ierr = PetscMemzero(values,col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1342     for (i=xs; i<xs+nx; i++) {
1343       istart = PetscMax(-s,gxs - i);
1344       iend   = PetscMin(s,gxs + gnx - i - 1);
1345       slot   = i - gxs;
1346 
1347       cnt = 0;
1348       for (l=0; l<nc; l++) {
1349         for (i1=istart; i1<iend+1; i1++) {
1350           cols[cnt++] = l + nc*(slot + i1);
1351         }
1352         rows[l] = l + nc*(slot);
1353       }
1354       ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1355     }
1356     ierr = PetscFree(values);CHKERRQ(ierr);
1357     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1358     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1359     ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1360   }
1361   PetscFunctionReturn(0);
1362 }
1363 
1364 #undef __FUNCT__
1365 #define __FUNCT__ "DMCreateMatrix_DA_2d_MPIBAIJ"
1366 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da,Mat J)
1367 {
1368   PetscErrorCode         ierr;
1369   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1370   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
1371   PetscInt               istart,iend,jstart,jend,ii,jj;
1372   MPI_Comm               comm;
1373   PetscScalar            *values;
1374   DMDABoundaryType       bx,by;
1375   DMDAStencilType        st;
1376   ISLocalToGlobalMapping ltog,ltogb;
1377 
1378   PetscFunctionBegin;
1379   /*
1380      nc - number of components per grid point
1381      col - number of colors needed in one direction for single component problem
1382   */
1383   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
1384   col  = 2*s + 1;
1385 
1386   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
1387   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
1388   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1389 
1390   ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1391 
1392   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1393   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1394 
1395   /* determine the matrix preallocation information */
1396   ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
1397   for (i=xs; i<xs+nx; i++) {
1398     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1399     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1400     for (j=ys; j<ys+ny; j++) {
1401       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1402       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1403       slot   = i - gxs + gnx*(j - gys);
1404 
1405       /* Find block columns in block row */
1406       cnt = 0;
1407       for (ii=istart; ii<iend+1; ii++) {
1408         for (jj=jstart; jj<jend+1; jj++) {
1409           if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1410             cols[cnt++] = slot + ii + gnx*jj;
1411           }
1412         }
1413       }
1414       ierr = MatPreallocateSetLocal(ltogb,1,&slot,ltogb,cnt,cols,dnz,onz);CHKERRQ(ierr);
1415     }
1416   }
1417   ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1418   ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1419   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1420 
1421   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1422   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1423 
1424   /*
1425     For each node in the grid: we get the neighbors in the local (on processor ordering
1426     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1427     PETSc ordering.
1428   */
1429   if (!da->prealloc_only) {
1430     ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1431     ierr = PetscMemzero(values,col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1432     for (i=xs; i<xs+nx; i++) {
1433       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1434       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1435       for (j=ys; j<ys+ny; j++) {
1436         jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1437         jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1438         slot = i - gxs + gnx*(j - gys);
1439         cnt  = 0;
1440         for (ii=istart; ii<iend+1; ii++) {
1441           for (jj=jstart; jj<jend+1; jj++) {
1442             if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1443               cols[cnt++] = slot + ii + gnx*jj;
1444             }
1445           }
1446         }
1447         ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1448       }
1449     }
1450     ierr = PetscFree(values);CHKERRQ(ierr);
1451     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1452     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1453   }
1454   ierr = PetscFree(cols);CHKERRQ(ierr);
1455   PetscFunctionReturn(0);
1456 }
1457 
1458 #undef __FUNCT__
1459 #define __FUNCT__ "DMCreateMatrix_DA_3d_MPIBAIJ"
1460 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da,Mat J)
1461 {
1462   PetscErrorCode         ierr;
1463   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1464   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
1465   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1466   MPI_Comm               comm;
1467   PetscScalar            *values;
1468   DMDABoundaryType       bx,by,bz;
1469   DMDAStencilType        st;
1470   ISLocalToGlobalMapping ltog,ltogb;
1471 
1472   PetscFunctionBegin;
1473   /*
1474          nc - number of components per grid point
1475          col - number of colors needed in one direction for single component problem
1476 
1477   */
1478   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1479   col  = 2*s + 1;
1480 
1481   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1482   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1483   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1484 
1485   ierr = PetscMalloc(col*col*col*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1486 
1487   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1488   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1489 
1490   /* determine the matrix preallocation information */
1491   ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1492   for (i=xs; i<xs+nx; i++) {
1493     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1494     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1495     for (j=ys; j<ys+ny; j++) {
1496       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1497       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1498       for (k=zs; k<zs+nz; k++) {
1499         kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1500         kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1501 
1502         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1503 
1504         /* Find block columns in block row */
1505         cnt = 0;
1506         for (ii=istart; ii<iend+1; ii++) {
1507           for (jj=jstart; jj<jend+1; jj++) {
1508             for (kk=kstart; kk<kend+1; kk++) {
1509               if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1510                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1511               }
1512             }
1513           }
1514         }
1515         ierr = MatPreallocateSetLocal(ltogb,1,&slot,ltogb,cnt,cols,dnz,onz);CHKERRQ(ierr);
1516       }
1517     }
1518   }
1519   ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1520   ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1521   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1522 
1523   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1524   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1525 
1526   /*
1527     For each node in the grid: we get the neighbors in the local (on processor ordering
1528     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1529     PETSc ordering.
1530   */
1531   if (!da->prealloc_only) {
1532     ierr = PetscMalloc(col*col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1533     ierr = PetscMemzero(values,col*col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1534     for (i=xs; i<xs+nx; i++) {
1535       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1536       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1537       for (j=ys; j<ys+ny; j++) {
1538         jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1539         jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1540         for (k=zs; k<zs+nz; k++) {
1541           kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1542           kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1543 
1544           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1545 
1546           cnt = 0;
1547           for (ii=istart; ii<iend+1; ii++) {
1548             for (jj=jstart; jj<jend+1; jj++) {
1549               for (kk=kstart; kk<kend+1; kk++) {
1550                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1551                   cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1552                 }
1553               }
1554             }
1555           }
1556           ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1557         }
1558       }
1559     }
1560     ierr = PetscFree(values);CHKERRQ(ierr);
1561     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1562     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1563   }
1564   ierr = PetscFree(cols);CHKERRQ(ierr);
1565   PetscFunctionReturn(0);
1566 }
1567 
1568 #undef __FUNCT__
1569 #define __FUNCT__ "L2GFilterUpperTriangular"
1570 /*
1571   This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to
1572   identify in the local ordering with periodic domain.
1573 */
1574 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog,PetscInt *row,PetscInt *cnt,PetscInt col[])
1575 {
1576   PetscErrorCode ierr;
1577   PetscInt       i,n;
1578 
1579   PetscFunctionBegin;
1580   ierr = ISLocalToGlobalMappingApply(ltog,1,row,row);CHKERRQ(ierr);
1581   ierr = ISLocalToGlobalMappingApply(ltog,*cnt,col,col);CHKERRQ(ierr);
1582   for (i=0,n=0; i<*cnt; i++) {
1583     if (col[i] >= *row) col[n++] = col[i];
1584   }
1585   *cnt = n;
1586   PetscFunctionReturn(0);
1587 }
1588 
1589 #undef __FUNCT__
1590 #define __FUNCT__ "DMCreateMatrix_DA_2d_MPISBAIJ"
1591 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da,Mat J)
1592 {
1593   PetscErrorCode         ierr;
1594   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1595   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
1596   PetscInt               istart,iend,jstart,jend,ii,jj;
1597   MPI_Comm               comm;
1598   PetscScalar            *values;
1599   DMDABoundaryType       bx,by;
1600   DMDAStencilType        st;
1601   ISLocalToGlobalMapping ltog,ltogb;
1602 
1603   PetscFunctionBegin;
1604   /*
1605      nc - number of components per grid point
1606      col - number of colors needed in one direction for single component problem
1607   */
1608   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
1609   col  = 2*s + 1;
1610 
1611   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
1612   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
1613   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1614 
1615   ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1616 
1617   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1618   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1619 
1620   /* determine the matrix preallocation information */
1621   ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
1622   for (i=xs; i<xs+nx; i++) {
1623     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1624     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1625     for (j=ys; j<ys+ny; j++) {
1626       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1627       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1628       slot   = i - gxs + gnx*(j - gys);
1629 
1630       /* Find block columns in block row */
1631       cnt = 0;
1632       for (ii=istart; ii<iend+1; ii++) {
1633         for (jj=jstart; jj<jend+1; jj++) {
1634           if (st == DMDA_STENCIL_BOX || !ii || !jj) {
1635             cols[cnt++] = slot + ii + gnx*jj;
1636           }
1637         }
1638       }
1639       ierr = L2GFilterUpperTriangular(ltogb,&slot,&cnt,cols);CHKERRQ(ierr);
1640       ierr = MatPreallocateSymmetricSet(slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
1641     }
1642   }
1643   ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1644   ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1645   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1646 
1647   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1648   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1649 
1650   /*
1651     For each node in the grid: we get the neighbors in the local (on processor ordering
1652     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1653     PETSc ordering.
1654   */
1655   if (!da->prealloc_only) {
1656     ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1657     ierr = PetscMemzero(values,col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1658     for (i=xs; i<xs+nx; i++) {
1659       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1660       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1661       for (j=ys; j<ys+ny; j++) {
1662         jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1663         jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1664         slot   = i - gxs + gnx*(j - gys);
1665 
1666         /* Find block columns in block row */
1667         cnt = 0;
1668         for (ii=istart; ii<iend+1; ii++) {
1669           for (jj=jstart; jj<jend+1; jj++) {
1670             if (st == DMDA_STENCIL_BOX || !ii || !jj) {
1671               cols[cnt++] = slot + ii + gnx*jj;
1672             }
1673           }
1674         }
1675         ierr = L2GFilterUpperTriangular(ltogb,&slot,&cnt,cols);CHKERRQ(ierr);
1676         ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1677       }
1678     }
1679     ierr = PetscFree(values);CHKERRQ(ierr);
1680     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1681     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1682   }
1683   ierr = PetscFree(cols);CHKERRQ(ierr);
1684   PetscFunctionReturn(0);
1685 }
1686 
1687 #undef __FUNCT__
1688 #define __FUNCT__ "DMCreateMatrix_DA_3d_MPISBAIJ"
1689 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da,Mat J)
1690 {
1691   PetscErrorCode         ierr;
1692   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1693   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
1694   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1695   MPI_Comm               comm;
1696   PetscScalar            *values;
1697   DMDABoundaryType       bx,by,bz;
1698   DMDAStencilType        st;
1699   ISLocalToGlobalMapping ltog,ltogb;
1700 
1701   PetscFunctionBegin;
1702   /*
1703      nc - number of components per grid point
1704      col - number of colors needed in one direction for single component problem
1705   */
1706   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1707   col  = 2*s + 1;
1708 
1709   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1710   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1711   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1712 
1713   /* create the matrix */
1714   ierr = PetscMalloc(col*col*col*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1715 
1716   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1717   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1718 
1719   /* determine the matrix preallocation information */
1720   ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1721   for (i=xs; i<xs+nx; i++) {
1722     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1723     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1724     for (j=ys; j<ys+ny; j++) {
1725       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1726       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1727       for (k=zs; k<zs+nz; k++) {
1728         kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1729         kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1730 
1731         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1732 
1733         /* Find block columns in block row */
1734         cnt = 0;
1735         for (ii=istart; ii<iend+1; ii++) {
1736           for (jj=jstart; jj<jend+1; jj++) {
1737             for (kk=kstart; kk<kend+1; kk++) {
1738               if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
1739                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1740               }
1741             }
1742           }
1743         }
1744         ierr = L2GFilterUpperTriangular(ltogb,&slot,&cnt,cols);CHKERRQ(ierr);
1745         ierr = MatPreallocateSymmetricSet(slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
1746       }
1747     }
1748   }
1749   ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1750   ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1751   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1752 
1753   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1754   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1755 
1756   /*
1757     For each node in the grid: we get the neighbors in the local (on processor ordering
1758     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1759     PETSc ordering.
1760   */
1761   if (!da->prealloc_only) {
1762     ierr = PetscMalloc(col*col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1763     ierr = PetscMemzero(values,col*col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1764     for (i=xs; i<xs+nx; i++) {
1765       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1766       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1767       for (j=ys; j<ys+ny; j++) {
1768         jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1769         jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1770         for (k=zs; k<zs+nz; k++) {
1771           kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1772           kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1773 
1774           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1775 
1776           cnt = 0;
1777           for (ii=istart; ii<iend+1; ii++) {
1778             for (jj=jstart; jj<jend+1; jj++) {
1779               for (kk=kstart; kk<kend+1; kk++) {
1780                 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
1781                   cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1782                 }
1783               }
1784             }
1785           }
1786           ierr = L2GFilterUpperTriangular(ltogb,&slot,&cnt,cols);CHKERRQ(ierr);
1787           ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1788         }
1789       }
1790     }
1791     ierr = PetscFree(values);CHKERRQ(ierr);
1792     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1793     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1794   }
1795   ierr = PetscFree(cols);CHKERRQ(ierr);
1796   PetscFunctionReturn(0);
1797 }
1798 
1799 /* ---------------------------------------------------------------------------------*/
1800 
1801 #undef __FUNCT__
1802 #define __FUNCT__ "DMCreateMatrix_DA_3d_MPIAIJ_Fill"
1803 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da,Mat J)
1804 {
1805   PetscErrorCode         ierr;
1806   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1807   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt,l,p,*dnz,*onz;
1808   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1809   DM_DA                  *dd = (DM_DA*)da->data;
1810   PetscInt               ifill_col,*dfill = dd->dfill,*ofill = dd->ofill;
1811   MPI_Comm               comm;
1812   PetscScalar            *values;
1813   DMDABoundaryType       bx,by,bz;
1814   ISLocalToGlobalMapping ltog,ltogb;
1815   DMDAStencilType        st;
1816 
1817   PetscFunctionBegin;
1818   /*
1819          nc - number of components per grid point
1820          col - number of colors needed in one direction for single component problem
1821 
1822   */
1823   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1824   col  = 2*s + 1;
1825   if (bx == DMDA_BOUNDARY_PERIODIC && (m % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
1826                  by 2*stencil_width + 1\n");
1827   if (by == DMDA_BOUNDARY_PERIODIC && (n % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
1828                  by 2*stencil_width + 1\n");
1829   if (bz == DMDA_BOUNDARY_PERIODIC && (p % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
1830                  by 2*stencil_width + 1\n");
1831 
1832   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1833   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1834   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1835 
1836   ierr = PetscMalloc(col*col*col*nc*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1837   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1838   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1839 
1840   /* determine the matrix preallocation information */
1841   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1842 
1843 
1844   for (i=xs; i<xs+nx; i++) {
1845     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1846     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1847     for (j=ys; j<ys+ny; j++) {
1848       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1849       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1850       for (k=zs; k<zs+nz; k++) {
1851         kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1852         kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1853 
1854         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1855 
1856         for (l=0; l<nc; l++) {
1857           cnt = 0;
1858           for (ii=istart; ii<iend+1; ii++) {
1859             for (jj=jstart; jj<jend+1; jj++) {
1860               for (kk=kstart; kk<kend+1; kk++) {
1861                 if (ii || jj || kk) {
1862                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1863                     for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1864                   }
1865                 } else {
1866                   if (dfill) {
1867                     for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1868                   } else {
1869                     for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1870                   }
1871                 }
1872               }
1873             }
1874           }
1875           row  = l + nc*(slot);
1876           ierr = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1877         }
1878       }
1879     }
1880   }
1881   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1882   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1883   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1884   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1885   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1886 
1887   /*
1888     For each node in the grid: we get the neighbors in the local (on processor ordering
1889     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1890     PETSc ordering.
1891   */
1892   if (!da->prealloc_only) {
1893     ierr = PetscMalloc(col*col*col*nc*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1894     ierr = PetscMemzero(values,col*col*col*nc*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1895     for (i=xs; i<xs+nx; i++) {
1896       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1897       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1898       for (j=ys; j<ys+ny; j++) {
1899         jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1900         jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1901         for (k=zs; k<zs+nz; k++) {
1902           kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1903           kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1904 
1905           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1906 
1907           for (l=0; l<nc; l++) {
1908             cnt = 0;
1909             for (ii=istart; ii<iend+1; ii++) {
1910               for (jj=jstart; jj<jend+1; jj++) {
1911                 for (kk=kstart; kk<kend+1; kk++) {
1912                   if (ii || jj || kk) {
1913                     if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1914                       for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1915                     }
1916                   } else {
1917                     if (dfill) {
1918                       for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1919                     } else {
1920                       for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1921                     }
1922                   }
1923                 }
1924               }
1925             }
1926             row  = l + nc*(slot);
1927             ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1928           }
1929         }
1930       }
1931     }
1932     ierr = PetscFree(values);CHKERRQ(ierr);
1933     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1934     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1935   }
1936   ierr = PetscFree(cols);CHKERRQ(ierr);
1937   PetscFunctionReturn(0);
1938 }
1939