xref: /petsc/src/dm/impls/da/fdda.c (revision b00a91154f763f12aa55f3d53a3f2776f15f49e3)
1 
2 #include <petsc-private/dmdaimpl.h> /*I      "petscdmda.h"     I*/
3 #include <petscmat.h>
4 #include <petsc-private/matimpl.h>
5 
6 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM,ISColoringType,ISColoring*);
7 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM,ISColoringType,ISColoring*);
8 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM,ISColoringType,ISColoring*);
9 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM,ISColoringType,ISColoring*);
10 
11 /*
12    For ghost i that may be negative or greater than the upper bound this
13   maps it into the 0:m-1 range using periodicity
14 */
15 #define SetInRange(i,m) ((i < 0) ? m+i : ((i >= m) ? i-m : i))
16 
17 #undef __FUNCT__
18 #define __FUNCT__ "DMDASetBlockFills_Private"
19 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill,PetscInt w,PetscInt **rfill)
20 {
21   PetscErrorCode ierr;
22   PetscInt       i,j,nz,*fill;
23 
24   PetscFunctionBegin;
25   if (!dfill) PetscFunctionReturn(0);
26 
27   /* count number nonzeros */
28   nz = 0;
29   for (i=0; i<w; i++) {
30     for (j=0; j<w; j++) {
31       if (dfill[w*i+j]) nz++;
32     }
33   }
34   ierr = PetscMalloc1((nz + w + 1),&fill);CHKERRQ(ierr);
35   /* construct modified CSR storage of nonzero structure */
36   /*  fill[0 -- w] marks starts of each row of column indices (and end of last row)
37    so fill[1] - fill[0] gives number of nonzeros in first row etc */
38   nz = w + 1;
39   for (i=0; i<w; i++) {
40     fill[i] = nz;
41     for (j=0; j<w; j++) {
42       if (dfill[w*i+j]) {
43         fill[nz] = j;
44         nz++;
45       }
46     }
47   }
48   fill[w] = nz;
49 
50   *rfill = fill;
51   PetscFunctionReturn(0);
52 }
53 
54 #undef __FUNCT__
55 #define __FUNCT__ "DMDASetBlockFills"
56 /*@
57     DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem
58     of the matrix returned by DMCreateMatrix().
59 
60     Logically Collective on DMDA
61 
62     Input Parameter:
63 +   da - the distributed array
64 .   dfill - the fill pattern in the diagonal block (may be NULL, means use dense block)
65 -   ofill - the fill pattern in the off-diagonal blocks
66 
67 
68     Level: developer
69 
70     Notes: This only makes sense when you are doing multicomponent problems but using the
71        MPIAIJ matrix format
72 
73            The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries
74        representing coupling and 0 entries for missing coupling. For example
75 $             dfill[9] = {1, 0, 0,
76 $                         1, 1, 0,
77 $                         0, 1, 1}
78        means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with
79        itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the
80        diagonal block).
81 
82      DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
83      can be represented in the dfill, ofill format
84 
85    Contributed by Glenn Hammond
86 
87 .seealso DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly()
88 
89 @*/
90 PetscErrorCode  DMDASetBlockFills(DM da,const PetscInt *dfill,const PetscInt *ofill)
91 {
92   DM_DA          *dd = (DM_DA*)da->data;
93   PetscErrorCode ierr;
94   PetscInt       i,k,cnt = 1;
95 
96   PetscFunctionBegin;
97   ierr = DMDASetBlockFills_Private(dfill,dd->w,&dd->dfill);CHKERRQ(ierr);
98   ierr = DMDASetBlockFills_Private(ofill,dd->w,&dd->ofill);CHKERRQ(ierr);
99 
100   /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of
101    columns to the left with any nonzeros in them plus 1 */
102   ierr = PetscCalloc1(dd->w,&dd->ofillcols);CHKERRQ(ierr);
103   for (i=0; i<dd->w; i++) {
104     for (k=dd->ofill[i]; k<dd->ofill[i+1]; k++) dd->ofillcols[dd->ofill[k]] = 1;
105   }
106   for (i=0; i<dd->w; i++) {
107     if (dd->ofillcols[i]) {
108       dd->ofillcols[i] = cnt++;
109     }
110   }
111   PetscFunctionReturn(0);
112 }
113 
114 
115 #undef __FUNCT__
116 #define __FUNCT__ "DMCreateColoring_DA"
117 PetscErrorCode  DMCreateColoring_DA(DM da,ISColoringType ctype,ISColoring *coloring)
118 {
119   PetscErrorCode   ierr;
120   PetscInt         dim,m,n,p,nc;
121   DMDABoundaryType bx,by,bz;
122   MPI_Comm         comm;
123   PetscMPIInt      size;
124   PetscBool        isBAIJ;
125   DM_DA            *dd = (DM_DA*)da->data;
126 
127   PetscFunctionBegin;
128   /*
129                                   m
130           ------------------------------------------------------
131          |                                                     |
132          |                                                     |
133          |               ----------------------                |
134          |               |                    |                |
135       n  |           yn  |                    |                |
136          |               |                    |                |
137          |               .---------------------                |
138          |             (xs,ys)     xn                          |
139          |            .                                        |
140          |         (gxs,gys)                                   |
141          |                                                     |
142           -----------------------------------------------------
143   */
144 
145   /*
146          nc - number of components per grid point
147          col - number of colors needed in one direction for single component problem
148 
149   */
150   ierr = DMDAGetInfo(da,&dim,0,0,0,&m,&n,&p,&nc,0,&bx,&by,&bz,0);CHKERRQ(ierr);
151 
152   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
153   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
154   if (ctype == IS_COLORING_GHOSTED) {
155     if (size == 1) {
156       ctype = IS_COLORING_GLOBAL;
157     } else if (dim > 1) {
158       if ((m==1 && bx == DMDA_BOUNDARY_PERIODIC) || (n==1 && by == DMDA_BOUNDARY_PERIODIC) || (p==1 && bz == DMDA_BOUNDARY_PERIODIC)) {
159         SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"IS_COLORING_GHOSTED cannot be used for periodic boundary condition having both ends of the domain  on the same process");
160       }
161     }
162   }
163 
164   /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ
165      matrices is for the blocks, not the individual matrix elements  */
166   ierr = PetscStrcmp(da->mattype,MATBAIJ,&isBAIJ);CHKERRQ(ierr);
167   if (!isBAIJ) {ierr = PetscStrcmp(da->mattype,MATMPIBAIJ,&isBAIJ);CHKERRQ(ierr);}
168   if (!isBAIJ) {ierr = PetscStrcmp(da->mattype,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);}
169   if (isBAIJ) {
170     dd->w  = 1;
171     dd->xs = dd->xs/nc;
172     dd->xe = dd->xe/nc;
173     dd->Xs = dd->Xs/nc;
174     dd->Xe = dd->Xe/nc;
175   }
176 
177   /*
178      We do not provide a getcoloring function in the DMDA operations because
179    the basic DMDA does not know about matrices. We think of DMDA as being more
180    more low-level then matrices.
181   */
182   if (dim == 1) {
183     ierr = DMCreateColoring_DA_1d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
184   } else if (dim == 2) {
185     ierr =  DMCreateColoring_DA_2d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
186   } else if (dim == 3) {
187     ierr =  DMCreateColoring_DA_3d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
188   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not done for %D dimension, send us mail petsc-maint@mcs.anl.gov for code",dim);
189   if (isBAIJ) {
190     dd->w  = nc;
191     dd->xs = dd->xs*nc;
192     dd->xe = dd->xe*nc;
193     dd->Xs = dd->Xs*nc;
194     dd->Xe = dd->Xe*nc;
195   }
196   PetscFunctionReturn(0);
197 }
198 
199 /* ---------------------------------------------------------------------------------*/
200 
201 #undef __FUNCT__
202 #define __FUNCT__ "DMCreateColoring_DA_2d_MPIAIJ"
203 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
204 {
205   PetscErrorCode   ierr;
206   PetscInt         xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,M,N,dim,s,k,nc,col;
207   PetscInt         ncolors;
208   MPI_Comm         comm;
209   DMDABoundaryType bx,by;
210   DMDAStencilType  st;
211   ISColoringValue  *colors;
212   DM_DA            *dd = (DM_DA*)da->data;
213 
214   PetscFunctionBegin;
215   /*
216          nc - number of components per grid point
217          col - number of colors needed in one direction for single component problem
218 
219   */
220   ierr = DMDAGetInfo(da,&dim,&m,&n,0,&M,&N,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
221   col  = 2*s + 1;
222   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
223   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
224   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
225 
226   /* special case as taught to us by Paul Hovland */
227   if (st == DMDA_STENCIL_STAR && s == 1) {
228     ierr = DMCreateColoring_DA_2d_5pt_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
229   } else {
230 
231     if (bx == DMDA_BOUNDARY_PERIODIC && (m % col)) SETERRQ2(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X (%d) is divisible\n\
232                                                             by 2*stencil_width + 1 (%d)\n", m, col);
233     if (by == DMDA_BOUNDARY_PERIODIC && (n % col)) SETERRQ2(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y (%d) is divisible\n\
234                                                             by 2*stencil_width + 1 (%d)\n", n, col);
235     if (ctype == IS_COLORING_GLOBAL) {
236       if (!dd->localcoloring) {
237         ierr = PetscMalloc1(nc*nx*ny,&colors);CHKERRQ(ierr);
238         ii   = 0;
239         for (j=ys; j<ys+ny; j++) {
240           for (i=xs; i<xs+nx; i++) {
241             for (k=0; k<nc; k++) {
242               colors[ii++] = k + nc*((i % col) + col*(j % col));
243             }
244           }
245         }
246         ncolors = nc + nc*(col-1 + col*(col-1));
247         ierr    = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,&dd->localcoloring);CHKERRQ(ierr);
248       }
249       *coloring = dd->localcoloring;
250     } else if (ctype == IS_COLORING_GHOSTED) {
251       if (!dd->ghostedcoloring) {
252         ierr = PetscMalloc1(nc*gnx*gny,&colors);CHKERRQ(ierr);
253         ii   = 0;
254         for (j=gys; j<gys+gny; j++) {
255           for (i=gxs; i<gxs+gnx; i++) {
256             for (k=0; k<nc; k++) {
257               /* the complicated stuff is to handle periodic boundaries */
258               colors[ii++] = k + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col));
259             }
260           }
261         }
262         ncolors = nc + nc*(col - 1 + col*(col-1));
263         ierr    = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,&dd->ghostedcoloring);CHKERRQ(ierr);
264         /* PetscIntView(ncolors,(PetscInt*)colors,0); */
265 
266         ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_GHOSTED);CHKERRQ(ierr);
267       }
268       *coloring = dd->ghostedcoloring;
269     } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
270   }
271   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
272   PetscFunctionReturn(0);
273 }
274 
275 /* ---------------------------------------------------------------------------------*/
276 
277 #undef __FUNCT__
278 #define __FUNCT__ "DMCreateColoring_DA_3d_MPIAIJ"
279 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
280 {
281   PetscErrorCode   ierr;
282   PetscInt         xs,ys,nx,ny,i,j,gxs,gys,gnx,gny,m,n,p,dim,s,k,nc,col,zs,gzs,ii,l,nz,gnz,M,N,P;
283   PetscInt         ncolors;
284   MPI_Comm         comm;
285   DMDABoundaryType bx,by,bz;
286   DMDAStencilType  st;
287   ISColoringValue  *colors;
288   DM_DA            *dd = (DM_DA*)da->data;
289 
290   PetscFunctionBegin;
291   /*
292          nc - number of components per grid point
293          col - number of colors needed in one direction for single component problem
294 
295   */
296   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
297   col  = 2*s + 1;
298   if (bx == DMDA_BOUNDARY_PERIODIC && (m % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
299                                                          by 2*stencil_width + 1\n");
300   if (by == DMDA_BOUNDARY_PERIODIC && (n % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
301                                                          by 2*stencil_width + 1\n");
302   if (bz == DMDA_BOUNDARY_PERIODIC && (p % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
303                                                          by 2*stencil_width + 1\n");
304 
305   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
306   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
307   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
308 
309   /* create the coloring */
310   if (ctype == IS_COLORING_GLOBAL) {
311     if (!dd->localcoloring) {
312       ierr = PetscMalloc1(nc*nx*ny*nz,&colors);CHKERRQ(ierr);
313       ii   = 0;
314       for (k=zs; k<zs+nz; k++) {
315         for (j=ys; j<ys+ny; j++) {
316           for (i=xs; i<xs+nx; i++) {
317             for (l=0; l<nc; l++) {
318               colors[ii++] = l + nc*((i % col) + col*(j % col) + col*col*(k % col));
319             }
320           }
321         }
322       }
323       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
324       ierr    = ISColoringCreate(comm,ncolors,nc*nx*ny*nz,colors,&dd->localcoloring);CHKERRQ(ierr);
325     }
326     *coloring = dd->localcoloring;
327   } else if (ctype == IS_COLORING_GHOSTED) {
328     if (!dd->ghostedcoloring) {
329       ierr = PetscMalloc1(nc*gnx*gny*gnz,&colors);CHKERRQ(ierr);
330       ii   = 0;
331       for (k=gzs; k<gzs+gnz; k++) {
332         for (j=gys; j<gys+gny; j++) {
333           for (i=gxs; i<gxs+gnx; i++) {
334             for (l=0; l<nc; l++) {
335               /* the complicated stuff is to handle periodic boundaries */
336               colors[ii++] = l + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col) + col*col*(SetInRange(k,p) % col));
337             }
338           }
339         }
340       }
341       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
342       ierr    = ISColoringCreate(comm,ncolors,nc*gnx*gny*gnz,colors,&dd->ghostedcoloring);CHKERRQ(ierr);
343       ierr    = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_GHOSTED);CHKERRQ(ierr);
344     }
345     *coloring = dd->ghostedcoloring;
346   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
347   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
348   PetscFunctionReturn(0);
349 }
350 
351 /* ---------------------------------------------------------------------------------*/
352 
353 #undef __FUNCT__
354 #define __FUNCT__ "DMCreateColoring_DA_1d_MPIAIJ"
355 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
356 {
357   PetscErrorCode   ierr;
358   PetscInt         xs,nx,i,i1,gxs,gnx,l,m,M,dim,s,nc,col;
359   PetscInt         ncolors;
360   MPI_Comm         comm;
361   DMDABoundaryType bx;
362   ISColoringValue  *colors;
363   DM_DA            *dd = (DM_DA*)da->data;
364 
365   PetscFunctionBegin;
366   /*
367          nc - number of components per grid point
368          col - number of colors needed in one direction for single component problem
369 
370   */
371   ierr = DMDAGetInfo(da,&dim,&m,0,0,&M,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr);
372   col  = 2*s + 1;
373 
374   if (bx == DMDA_BOUNDARY_PERIODIC && (m % col)) SETERRQ2(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points %d is divisible\n\
375                                                           by 2*stencil_width + 1 %d\n",(int)m,(int)col);
376 
377   ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
378   ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
379   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
380 
381   /* create the coloring */
382   if (ctype == IS_COLORING_GLOBAL) {
383     if (!dd->localcoloring) {
384       ierr = PetscMalloc1(nc*nx,&colors);CHKERRQ(ierr);
385       if (dd->ofillcols) {
386         PetscInt tc = 0;
387         for (i=0; i<nc; i++) tc += (PetscInt) (dd->ofillcols[i] > 0);
388         i1 = 0;
389         for (i=xs; i<xs+nx; i++) {
390           for (l=0; l<nc; l++) {
391             if (dd->ofillcols[l] && (i % col)) {
392               colors[i1++] =  nc - 1 + tc*((i % col) - 1) + dd->ofillcols[l];
393             } else {
394               colors[i1++] = l;
395             }
396           }
397         }
398         ncolors = nc + 2*s*tc;
399       } else {
400         i1 = 0;
401         for (i=xs; i<xs+nx; i++) {
402           for (l=0; l<nc; l++) {
403             colors[i1++] = l + nc*(i % col);
404           }
405         }
406         ncolors = nc + nc*(col-1);
407       }
408       ierr = ISColoringCreate(comm,ncolors,nc*nx,colors,&dd->localcoloring);CHKERRQ(ierr);
409     }
410     *coloring = dd->localcoloring;
411   } else if (ctype == IS_COLORING_GHOSTED) {
412     if (!dd->ghostedcoloring) {
413       ierr = PetscMalloc1(nc*gnx,&colors);CHKERRQ(ierr);
414       i1   = 0;
415       for (i=gxs; i<gxs+gnx; i++) {
416         for (l=0; l<nc; l++) {
417           /* the complicated stuff is to handle periodic boundaries */
418           colors[i1++] = l + nc*(SetInRange(i,m) % col);
419         }
420       }
421       ncolors = nc + nc*(col-1);
422       ierr    = ISColoringCreate(comm,ncolors,nc*gnx,colors,&dd->ghostedcoloring);CHKERRQ(ierr);
423       ierr    = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_GHOSTED);CHKERRQ(ierr);
424     }
425     *coloring = dd->ghostedcoloring;
426   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
427   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
428   PetscFunctionReturn(0);
429 }
430 
431 #undef __FUNCT__
432 #define __FUNCT__ "DMCreateColoring_DA_2d_5pt_MPIAIJ"
433 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
434 {
435   PetscErrorCode   ierr;
436   PetscInt         xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,dim,s,k,nc;
437   PetscInt         ncolors;
438   MPI_Comm         comm;
439   DMDABoundaryType bx,by;
440   ISColoringValue  *colors;
441   DM_DA            *dd = (DM_DA*)da->data;
442 
443   PetscFunctionBegin;
444   /*
445          nc - number of components per grid point
446          col - number of colors needed in one direction for single component problem
447 
448   */
449   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,0);CHKERRQ(ierr);
450   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
451   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
452   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
453 
454   if (bx == DMDA_BOUNDARY_PERIODIC && (m % 5)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible by 5\n");
455   if (by == DMDA_BOUNDARY_PERIODIC && (n % 5)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible by 5\n");
456 
457   /* create the coloring */
458   if (ctype == IS_COLORING_GLOBAL) {
459     if (!dd->localcoloring) {
460       ierr = PetscMalloc1(nc*nx*ny,&colors);CHKERRQ(ierr);
461       ii   = 0;
462       for (j=ys; j<ys+ny; j++) {
463         for (i=xs; i<xs+nx; i++) {
464           for (k=0; k<nc; k++) {
465             colors[ii++] = k + nc*((3*j+i) % 5);
466           }
467         }
468       }
469       ncolors = 5*nc;
470       ierr    = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,&dd->localcoloring);CHKERRQ(ierr);
471     }
472     *coloring = dd->localcoloring;
473   } else if (ctype == IS_COLORING_GHOSTED) {
474     if (!dd->ghostedcoloring) {
475       ierr = PetscMalloc1(nc*gnx*gny,&colors);CHKERRQ(ierr);
476       ii = 0;
477       for (j=gys; j<gys+gny; j++) {
478         for (i=gxs; i<gxs+gnx; i++) {
479           for (k=0; k<nc; k++) {
480             colors[ii++] = k + nc*((3*SetInRange(j,n) + SetInRange(i,m)) % 5);
481           }
482         }
483       }
484       ncolors = 5*nc;
485       ierr    = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,&dd->ghostedcoloring);CHKERRQ(ierr);
486       ierr    = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_GHOSTED);CHKERRQ(ierr);
487     }
488     *coloring = dd->ghostedcoloring;
489   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
490   PetscFunctionReturn(0);
491 }
492 
493 /* =========================================================================== */
494 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM,Mat);
495 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM,Mat);
496 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM,Mat);
497 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM,Mat);
498 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM,Mat);
499 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM,Mat);
500 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM,Mat);
501 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM,Mat);
502 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM,Mat);
503 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM,Mat);
504 
505 #undef __FUNCT__
506 #define __FUNCT__ "MatSetupDM"
507 /*@C
508    MatSetupDM - Sets the DMDA that is to be used by the HYPRE_StructMatrix PETSc matrix
509 
510    Logically Collective on Mat
511 
512    Input Parameters:
513 +  mat - the matrix
514 -  da - the da
515 
516    Level: intermediate
517 
518 @*/
519 PetscErrorCode MatSetupDM(Mat mat,DM da)
520 {
521   PetscErrorCode ierr;
522 
523   PetscFunctionBegin;
524   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
525   PetscValidHeaderSpecific(da,DM_CLASSID,1);
526   ierr = PetscTryMethod(mat,"MatSetupDM_C",(Mat,DM),(mat,da));CHKERRQ(ierr);
527   PetscFunctionReturn(0);
528 }
529 
530 #undef __FUNCT__
531 #define __FUNCT__ "MatView_MPI_DA"
532 PetscErrorCode  MatView_MPI_DA(Mat A,PetscViewer viewer)
533 {
534   DM                da;
535   PetscErrorCode    ierr;
536   const char        *prefix;
537   Mat               Anatural;
538   AO                ao;
539   PetscInt          rstart,rend,*petsc,i;
540   IS                is;
541   MPI_Comm          comm;
542   PetscViewerFormat format;
543 
544   PetscFunctionBegin;
545   /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */
546   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
547   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(0);
548 
549   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
550   ierr = MatGetDM(A, &da);CHKERRQ(ierr);
551   if (!da) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
552 
553   ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);
554   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
555   ierr = PetscMalloc1((rend-rstart),&petsc);CHKERRQ(ierr);
556   for (i=rstart; i<rend; i++) petsc[i-rstart] = i;
557   ierr = AOApplicationToPetsc(ao,rend-rstart,petsc);CHKERRQ(ierr);
558   ierr = ISCreateGeneral(comm,rend-rstart,petsc,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
559 
560   /* call viewer on natural ordering */
561   ierr = MatGetSubMatrix(A,is,is,MAT_INITIAL_MATRIX,&Anatural);CHKERRQ(ierr);
562   ierr = ISDestroy(&is);CHKERRQ(ierr);
563   ierr = PetscObjectGetOptionsPrefix((PetscObject)A,&prefix);CHKERRQ(ierr);
564   ierr = PetscObjectSetOptionsPrefix((PetscObject)Anatural,prefix);CHKERRQ(ierr);
565   ierr = PetscObjectSetName((PetscObject)Anatural,((PetscObject)A)->name);CHKERRQ(ierr);
566   ierr = MatView(Anatural,viewer);CHKERRQ(ierr);
567   ierr = MatDestroy(&Anatural);CHKERRQ(ierr);
568   PetscFunctionReturn(0);
569 }
570 
571 #undef __FUNCT__
572 #define __FUNCT__ "MatLoad_MPI_DA"
573 PetscErrorCode  MatLoad_MPI_DA(Mat A,PetscViewer viewer)
574 {
575   DM             da;
576   PetscErrorCode ierr;
577   Mat            Anatural,Aapp;
578   AO             ao;
579   PetscInt       rstart,rend,*app,i;
580   IS             is;
581   MPI_Comm       comm;
582 
583   PetscFunctionBegin;
584   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
585   ierr = MatGetDM(A, &da);CHKERRQ(ierr);
586   if (!da) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
587 
588   /* Load the matrix in natural ordering */
589   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Anatural);CHKERRQ(ierr);
590   ierr = MatSetType(Anatural,((PetscObject)A)->type_name);CHKERRQ(ierr);
591   ierr = MatSetSizes(Anatural,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
592   ierr = MatLoad(Anatural,viewer);CHKERRQ(ierr);
593 
594   /* Map natural ordering to application ordering and create IS */
595   ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);
596   ierr = MatGetOwnershipRange(Anatural,&rstart,&rend);CHKERRQ(ierr);
597   ierr = PetscMalloc1((rend-rstart),&app);CHKERRQ(ierr);
598   for (i=rstart; i<rend; i++) app[i-rstart] = i;
599   ierr = AOPetscToApplication(ao,rend-rstart,app);CHKERRQ(ierr);
600   ierr = ISCreateGeneral(comm,rend-rstart,app,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
601 
602   /* Do permutation and replace header */
603   ierr = MatGetSubMatrix(Anatural,is,is,MAT_INITIAL_MATRIX,&Aapp);CHKERRQ(ierr);
604   ierr = MatHeaderReplace(A,Aapp);CHKERRQ(ierr);
605   ierr = ISDestroy(&is);CHKERRQ(ierr);
606   ierr = MatDestroy(&Anatural);CHKERRQ(ierr);
607   PetscFunctionReturn(0);
608 }
609 
610 #undef __FUNCT__
611 #define __FUNCT__ "DMCreateMatrix_DA"
612 PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J)
613 {
614   PetscErrorCode ierr;
615   PetscInt       dim,dof,nx,ny,nz,dims[3],starts[3],M,N,P;
616   Mat            A;
617   MPI_Comm       comm;
618   MatType        Atype;
619   PetscSection   section, sectionGlobal;
620   void           (*aij)(void)=NULL,(*baij)(void)=NULL,(*sbaij)(void)=NULL;
621   MatType        mtype;
622   PetscMPIInt    size;
623   DM_DA          *dd = (DM_DA*)da->data;
624 
625   PetscFunctionBegin;
626   ierr = MatInitializePackage();CHKERRQ(ierr);
627   mtype = da->mattype;
628 
629   ierr = DMGetDefaultSection(da, &section);CHKERRQ(ierr);
630   if (section) {
631     PetscInt  bs = -1;
632     PetscInt  localSize;
633     PetscBool isShell, isBlock, isSeqBlock, isMPIBlock, isSymBlock, isSymSeqBlock, isSymMPIBlock, isSymmetric;
634 
635     ierr = DMGetDefaultGlobalSection(da, &sectionGlobal);CHKERRQ(ierr);
636     ierr = PetscSectionGetConstrainedStorageSize(sectionGlobal, &localSize);CHKERRQ(ierr);
637     ierr = MatCreate(PetscObjectComm((PetscObject)da), J);CHKERRQ(ierr);
638     ierr = MatSetSizes(*J, localSize, localSize, PETSC_DETERMINE, PETSC_DETERMINE);CHKERRQ(ierr);
639     ierr = MatSetType(*J, mtype);CHKERRQ(ierr);
640     ierr = MatSetFromOptions(*J);CHKERRQ(ierr);
641     ierr = PetscStrcmp(mtype, MATSHELL, &isShell);CHKERRQ(ierr);
642     ierr = PetscStrcmp(mtype, MATBAIJ, &isBlock);CHKERRQ(ierr);
643     ierr = PetscStrcmp(mtype, MATSEQBAIJ, &isSeqBlock);CHKERRQ(ierr);
644     ierr = PetscStrcmp(mtype, MATMPIBAIJ, &isMPIBlock);CHKERRQ(ierr);
645     ierr = PetscStrcmp(mtype, MATSBAIJ, &isSymBlock);CHKERRQ(ierr);
646     ierr = PetscStrcmp(mtype, MATSEQSBAIJ, &isSymSeqBlock);CHKERRQ(ierr);
647     ierr = PetscStrcmp(mtype, MATMPISBAIJ, &isSymMPIBlock);CHKERRQ(ierr);
648     /* Check for symmetric storage */
649     isSymmetric = (PetscBool) (isSymBlock || isSymSeqBlock || isSymMPIBlock);
650     if (isSymmetric) {
651       ierr = MatSetOption(*J, MAT_IGNORE_LOWER_TRIANGULAR, PETSC_TRUE);CHKERRQ(ierr);
652     }
653     if (!isShell) {
654       PetscInt *dnz, *onz, *dnzu, *onzu, bsLocal;
655 
656       if (bs < 0) {
657         if (isBlock || isSeqBlock || isMPIBlock || isSymBlock || isSymSeqBlock || isSymMPIBlock) {
658           PetscInt pStart, pEnd, p, dof;
659 
660           ierr = PetscSectionGetChart(sectionGlobal, &pStart, &pEnd);CHKERRQ(ierr);
661           for (p = pStart; p < pEnd; ++p) {
662             ierr = PetscSectionGetDof(sectionGlobal, p, &dof);CHKERRQ(ierr);
663             if (dof) {
664               bs = dof;
665               break;
666             }
667           }
668         } else {
669           bs = 1;
670         }
671         /* Must have same blocksize on all procs (some might have no points) */
672         bsLocal = bs;
673         ierr    = MPI_Allreduce(&bsLocal, &bs, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)da));CHKERRQ(ierr);
674       }
675       ierr = PetscCalloc4(localSize/bs, &dnz, localSize/bs, &onz, localSize/bs, &dnzu, localSize/bs, &onzu);CHKERRQ(ierr);
676       /* ierr = DMPlexPreallocateOperator(dm, bs, section, sectionGlobal, dnz, onz, dnzu, onzu, *J, fillMatrix);CHKERRQ(ierr); */
677       ierr = PetscFree4(dnz, onz, dnzu, onzu);CHKERRQ(ierr);
678     }
679   }
680   /*
681                                   m
682           ------------------------------------------------------
683          |                                                     |
684          |                                                     |
685          |               ----------------------                |
686          |               |                    |                |
687       n  |           ny  |                    |                |
688          |               |                    |                |
689          |               .---------------------                |
690          |             (xs,ys)     nx                          |
691          |            .                                        |
692          |         (gxs,gys)                                   |
693          |                                                     |
694           -----------------------------------------------------
695   */
696 
697   /*
698          nc - number of components per grid point
699          col - number of colors needed in one direction for single component problem
700 
701   */
702   M   = dd->M;
703   N   = dd->N;
704   P   = dd->P;
705   dim = dd->dim;
706   dof = dd->w;
707   /* ierr = DMDAGetInfo(da,&dim,&M,&N,&P,0,0,0,&dof,0,0,0,0,0);CHKERRQ(ierr); */
708   ierr = DMDAGetCorners(da,0,0,0,&nx,&ny,&nz);CHKERRQ(ierr);
709   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
710   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
711   ierr = MatSetSizes(A,dof*nx*ny*nz,dof*nx*ny*nz,dof*M*N*P,dof*M*N*P);CHKERRQ(ierr);
712   ierr = MatSetType(A,mtype);CHKERRQ(ierr);
713   ierr = MatSetDM(A,da);CHKERRQ(ierr);
714   ierr = MatSetFromOptions(A);CHKERRQ(ierr);
715   ierr = MatGetType(A,&Atype);CHKERRQ(ierr);
716   /*
717      We do not provide a getmatrix function in the DMDA operations because
718    the basic DMDA does not know about matrices. We think of DMDA as being more
719    more low-level than matrices. This is kind of cheating but, cause sometimes
720    we think of DMDA has higher level than matrices.
721 
722      We could switch based on Atype (or mtype), but we do not since the
723    specialized setting routines depend only the particular preallocation
724    details of the matrix, not the type itself.
725   */
726   ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJSetPreallocation_C",&aij);CHKERRQ(ierr);
727   if (!aij) {
728     ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJSetPreallocation_C",&aij);CHKERRQ(ierr);
729   }
730   if (!aij) {
731     ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIBAIJSetPreallocation_C",&baij);CHKERRQ(ierr);
732     if (!baij) {
733       ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",&baij);CHKERRQ(ierr);
734     }
735     if (!baij) {
736       ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPISBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr);
737       if (!sbaij) {
738         ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr);
739       }
740     }
741   }
742   if (aij) {
743     if (dim == 1) {
744       if (dd->ofill) {
745         ierr = DMCreateMatrix_DA_1d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
746       } else {
747         ierr = DMCreateMatrix_DA_1d_MPIAIJ(da,A);CHKERRQ(ierr);
748       }
749     } else if (dim == 2) {
750       if (dd->ofill) {
751         ierr = DMCreateMatrix_DA_2d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
752       } else {
753         ierr = DMCreateMatrix_DA_2d_MPIAIJ(da,A);CHKERRQ(ierr);
754       }
755     } else if (dim == 3) {
756       if (dd->ofill) {
757         ierr = DMCreateMatrix_DA_3d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
758       } else {
759         ierr = DMCreateMatrix_DA_3d_MPIAIJ(da,A);CHKERRQ(ierr);
760       }
761     }
762   } else if (baij) {
763     if (dim == 2) {
764       ierr = DMCreateMatrix_DA_2d_MPIBAIJ(da,A);CHKERRQ(ierr);
765     } else if (dim == 3) {
766       ierr = DMCreateMatrix_DA_3d_MPIBAIJ(da,A);CHKERRQ(ierr);
767     } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
768   } else if (sbaij) {
769     if (dim == 2) {
770       ierr = DMCreateMatrix_DA_2d_MPISBAIJ(da,A);CHKERRQ(ierr);
771     } else if (dim == 3) {
772       ierr = DMCreateMatrix_DA_3d_MPISBAIJ(da,A);CHKERRQ(ierr);
773     } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
774   } else {
775     ISLocalToGlobalMapping ltog,ltogb;
776     ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
777     ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
778     ierr = MatSetUp(A);CHKERRQ(ierr);
779     ierr = MatSetLocalToGlobalMapping(A,ltog,ltog);CHKERRQ(ierr);
780     ierr = MatSetLocalToGlobalMappingBlock(A,ltogb,ltogb);CHKERRQ(ierr);
781   }
782   ierr = DMDAGetGhostCorners(da,&starts[0],&starts[1],&starts[2],&dims[0],&dims[1],&dims[2]);CHKERRQ(ierr);
783   ierr = MatSetStencil(A,dim,dims,starts,dof);CHKERRQ(ierr);
784   ierr = MatSetDM(A,da);CHKERRQ(ierr);
785   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
786   if (size > 1) {
787     /* change viewer to display matrix in natural ordering */
788     ierr = MatShellSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA);CHKERRQ(ierr);
789     ierr = MatShellSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA);CHKERRQ(ierr);
790   }
791   *J = A;
792   PetscFunctionReturn(0);
793 }
794 
795 /* ---------------------------------------------------------------------------------*/
796 #undef __FUNCT__
797 #define __FUNCT__ "DMCreateMatrix_DA_2d_MPIAIJ"
798 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da,Mat J)
799 {
800   PetscErrorCode         ierr;
801   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p;
802   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
803   MPI_Comm               comm;
804   PetscScalar            *values;
805   DMDABoundaryType       bx,by;
806   ISLocalToGlobalMapping ltog,ltogb;
807   DMDAStencilType        st;
808 
809   PetscFunctionBegin;
810   /*
811          nc - number of components per grid point
812          col - number of colors needed in one direction for single component problem
813 
814   */
815   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
816   col  = 2*s + 1;
817   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
818   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
819   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
820 
821   ierr = PetscMalloc2(nc,&rows,col*col*nc*nc,&cols);CHKERRQ(ierr);
822   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
823   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
824 
825   /* determine the matrix preallocation information */
826   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
827   for (i=xs; i<xs+nx; i++) {
828 
829     pstart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
830     pend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
831 
832     for (j=ys; j<ys+ny; j++) {
833       slot = i - gxs + gnx*(j - gys);
834 
835       lstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
836       lend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
837 
838       cnt = 0;
839       for (k=0; k<nc; k++) {
840         for (l=lstart; l<lend+1; l++) {
841           for (p=pstart; p<pend+1; p++) {
842             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
843               cols[cnt++] = k + nc*(slot + gnx*l + p);
844             }
845           }
846         }
847         rows[k] = k + nc*(slot);
848       }
849       ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
850     }
851   }
852   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
853   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
854   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
855   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
856 
857   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
858   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
859 
860   /*
861     For each node in the grid: we get the neighbors in the local (on processor ordering
862     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
863     PETSc ordering.
864   */
865   if (!da->prealloc_only) {
866     ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr);
867     for (i=xs; i<xs+nx; i++) {
868 
869       pstart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
870       pend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
871 
872       for (j=ys; j<ys+ny; j++) {
873         slot = i - gxs + gnx*(j - gys);
874 
875         lstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
876         lend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
877 
878         cnt = 0;
879         for (k=0; k<nc; k++) {
880           for (l=lstart; l<lend+1; l++) {
881             for (p=pstart; p<pend+1; p++) {
882               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
883                 cols[cnt++] = k + nc*(slot + gnx*l + p);
884               }
885             }
886           }
887           rows[k] = k + nc*(slot);
888         }
889         ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
890       }
891     }
892     ierr = PetscFree(values);CHKERRQ(ierr);
893     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
894     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
895   }
896   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
897   PetscFunctionReturn(0);
898 }
899 
900 #undef __FUNCT__
901 #define __FUNCT__ "DMCreateMatrix_DA_2d_MPIAIJ_Fill"
902 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da,Mat J)
903 {
904   PetscErrorCode         ierr;
905   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
906   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt,l,p;
907   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
908   DM_DA                  *dd = (DM_DA*)da->data;
909   PetscInt               ifill_col,*ofill = dd->ofill, *dfill = dd->dfill;
910   MPI_Comm               comm;
911   PetscScalar            *values;
912   DMDABoundaryType       bx,by;
913   ISLocalToGlobalMapping ltog,ltogb;
914   DMDAStencilType        st;
915 
916   PetscFunctionBegin;
917   /*
918          nc - number of components per grid point
919          col - number of colors needed in one direction for single component problem
920 
921   */
922   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
923   col  = 2*s + 1;
924   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
925   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
926   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
927 
928   ierr = PetscMalloc1(col*col*nc*nc,&cols);CHKERRQ(ierr);
929   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
930   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
931 
932   /* determine the matrix preallocation information */
933   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
934   for (i=xs; i<xs+nx; i++) {
935 
936     pstart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
937     pend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
938 
939     for (j=ys; j<ys+ny; j++) {
940       slot = i - gxs + gnx*(j - gys);
941 
942       lstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
943       lend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
944 
945       for (k=0; k<nc; k++) {
946         cnt = 0;
947         for (l=lstart; l<lend+1; l++) {
948           for (p=pstart; p<pend+1; p++) {
949             if (l || p) {
950               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
951                 for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p);
952               }
953             } else {
954               if (dfill) {
955                 for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p);
956               } else {
957                 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p);
958               }
959             }
960           }
961         }
962         row  = k + nc*(slot);
963         ierr = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
964       }
965     }
966   }
967   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
968   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
969   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
970   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
971   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
972 
973   /*
974     For each node in the grid: we get the neighbors in the local (on processor ordering
975     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
976     PETSc ordering.
977   */
978   if (!da->prealloc_only) {
979     ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr);
980     for (i=xs; i<xs+nx; i++) {
981 
982       pstart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
983       pend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
984 
985       for (j=ys; j<ys+ny; j++) {
986         slot = i - gxs + gnx*(j - gys);
987 
988         lstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
989         lend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
990 
991         for (k=0; k<nc; k++) {
992           cnt = 0;
993           for (l=lstart; l<lend+1; l++) {
994             for (p=pstart; p<pend+1; p++) {
995               if (l || p) {
996                 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
997                   for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p);
998                 }
999               } else {
1000                 if (dfill) {
1001                   for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p);
1002                 } else {
1003                   for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p);
1004                 }
1005               }
1006             }
1007           }
1008           row  = k + nc*(slot);
1009           ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1010         }
1011       }
1012     }
1013     ierr = PetscFree(values);CHKERRQ(ierr);
1014     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1015     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1016   }
1017   ierr = PetscFree(cols);CHKERRQ(ierr);
1018   PetscFunctionReturn(0);
1019 }
1020 
1021 /* ---------------------------------------------------------------------------------*/
1022 
1023 #undef __FUNCT__
1024 #define __FUNCT__ "DMCreateMatrix_DA_3d_MPIAIJ"
1025 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da,Mat J)
1026 {
1027   PetscErrorCode         ierr;
1028   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1029   PetscInt               m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL;
1030   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1031   MPI_Comm               comm;
1032   PetscScalar            *values;
1033   DMDABoundaryType       bx,by,bz;
1034   ISLocalToGlobalMapping ltog,ltogb;
1035   DMDAStencilType        st;
1036 
1037   PetscFunctionBegin;
1038   /*
1039          nc - number of components per grid point
1040          col - number of colors needed in one direction for single component problem
1041 
1042   */
1043   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1044   col  = 2*s + 1;
1045 
1046   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1047   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1048   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1049 
1050   ierr = PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols);CHKERRQ(ierr);
1051   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1052   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1053 
1054   /* determine the matrix preallocation information */
1055   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1056   for (i=xs; i<xs+nx; i++) {
1057     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1058     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1059     for (j=ys; j<ys+ny; j++) {
1060       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1061       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1062       for (k=zs; k<zs+nz; k++) {
1063         kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1064         kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1065 
1066         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1067 
1068         cnt = 0;
1069         for (l=0; l<nc; l++) {
1070           for (ii=istart; ii<iend+1; ii++) {
1071             for (jj=jstart; jj<jend+1; jj++) {
1072               for (kk=kstart; kk<kend+1; kk++) {
1073                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1074                   cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1075                 }
1076               }
1077             }
1078           }
1079           rows[l] = l + nc*(slot);
1080         }
1081         ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1082       }
1083     }
1084   }
1085   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1086   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1087   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1088   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1089   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1090   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1091 
1092   /*
1093     For each node in the grid: we get the neighbors in the local (on processor ordering
1094     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1095     PETSc ordering.
1096   */
1097   if (!da->prealloc_only) {
1098     ierr = PetscCalloc1(col*col*col*nc*nc*nc,&values);CHKERRQ(ierr);
1099     for (i=xs; i<xs+nx; i++) {
1100       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1101       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1102       for (j=ys; j<ys+ny; j++) {
1103         jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1104         jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1105         for (k=zs; k<zs+nz; k++) {
1106           kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1107           kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1108 
1109           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1110 
1111           cnt = 0;
1112           for (l=0; l<nc; l++) {
1113             for (ii=istart; ii<iend+1; ii++) {
1114               for (jj=jstart; jj<jend+1; jj++) {
1115                 for (kk=kstart; kk<kend+1; kk++) {
1116                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1117                     cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1118                   }
1119                 }
1120               }
1121             }
1122             rows[l] = l + nc*(slot);
1123           }
1124           ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1125         }
1126       }
1127     }
1128     ierr = PetscFree(values);CHKERRQ(ierr);
1129     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1130     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1131   }
1132   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1133   PetscFunctionReturn(0);
1134 }
1135 
1136 /* ---------------------------------------------------------------------------------*/
1137 
1138 #undef __FUNCT__
1139 #define __FUNCT__ "DMCreateMatrix_DA_1d_MPIAIJ_Fill"
1140 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da,Mat J)
1141 {
1142   PetscErrorCode         ierr;
1143   DM_DA                  *dd = (DM_DA*)da->data;
1144   PetscInt               xs,nx,i,j,gxs,gnx,row,k,l;
1145   PetscInt               m,dim,s,*cols = NULL,nc,col,cnt,*ocols;
1146   PetscInt               *ofill = dd->ofill;
1147   PetscScalar            *values;
1148   DMDABoundaryType       bx;
1149   ISLocalToGlobalMapping ltog,ltogb;
1150   PetscMPIInt            rank,size;
1151 
1152   PetscFunctionBegin;
1153   if (dd->bx == DMDA_BOUNDARY_PERIODIC) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"With fill provided not implemented with periodic boundary conditions");
1154   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)da),&rank);CHKERRQ(ierr);
1155   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)da),&size);CHKERRQ(ierr);
1156 
1157   /*
1158          nc - number of components per grid point
1159          col - number of colors needed in one direction for single component problem
1160 
1161   */
1162   ierr = DMDAGetInfo(da,&dim,&m,0,0,0,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr);
1163   col  = 2*s + 1;
1164 
1165   ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
1166   ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
1167 
1168   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1169   ierr = PetscCalloc2(nx*nc,&cols,nx*nc,&ocols);CHKERRQ(ierr);
1170 
1171   /*
1172         note should be smaller for first and last process with no periodic
1173         does not handle dfill
1174   */
1175   cnt = 0;
1176   /* coupling with process to the left */
1177   for (i=0; i<s; i++) {
1178     for (j=0; j<nc; j++) {
1179       ocols[cnt] = ((!rank) ? 0 : (s - i)*(ofill[j+1] - ofill[j]));
1180       cols[cnt]  = nc + (s + i)*(ofill[j+1] - ofill[j]);
1181       cnt++;
1182     }
1183   }
1184   for (i=s; i<nx-s; i++) {
1185     for (j=0; j<nc; j++) {
1186       cols[cnt] = nc + 2*s*(ofill[j+1] - ofill[j]);
1187       cnt++;
1188     }
1189   }
1190   /* coupling with process to the right */
1191   for (i=nx-s; i<nx; i++) {
1192     for (j=0; j<nc; j++) {
1193       ocols[cnt] = ((rank == (size-1)) ? 0 : (i - nx + s + 1)*(ofill[j+1] - ofill[j]));
1194       cols[cnt]  = nc + (s + nx - i - 1)*(ofill[j+1] - ofill[j]);
1195       cnt++;
1196     }
1197   }
1198 
1199   ierr = MatSeqAIJSetPreallocation(J,0,cols);CHKERRQ(ierr);
1200   ierr = MatMPIAIJSetPreallocation(J,0,cols,0,ocols);CHKERRQ(ierr);
1201   ierr = PetscFree2(cols,ocols);CHKERRQ(ierr);
1202 
1203   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1204   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1205   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1206   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1207 
1208   /*
1209     For each node in the grid: we get the neighbors in the local (on processor ordering
1210     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1211     PETSc ordering.
1212   */
1213   if (!da->prealloc_only) {
1214     ierr = PetscMalloc1(col*nc*nc,&cols);CHKERRQ(ierr);
1215     ierr = PetscCalloc1(col*nc*nc,&values);CHKERRQ(ierr);
1216 
1217     row = xs*nc;
1218     /* coupling with process to the left */
1219     for (i=xs; i<xs+s; i++) {
1220       for (j=0; j<nc; j++) {
1221         cnt = 0;
1222         if (rank) {
1223           for (l=0; l<s; l++) {
1224             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1225           }
1226         }
1227         for (k=0; k<nc; k++) {
1228           cols[cnt++] = i*nc + k;
1229         }
1230         for (l=0; l<s; l++) {
1231           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1232         }
1233         ierr = MatSetValues(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1234         row++;
1235       }
1236     }
1237     for (i=xs+s; i<xs+nx-s; i++) {
1238       for (j=0; j<nc; j++) {
1239         cnt = 0;
1240         for (l=0; l<s; l++) {
1241           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1242         }
1243         for (k=0; k<nc; k++) {
1244           cols[cnt++] = i*nc + k;
1245         }
1246         for (l=0; l<s; l++) {
1247           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1248         }
1249         ierr = MatSetValues(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1250         row++;
1251       }
1252     }
1253     /* coupling with process to the right */
1254     for (i=xs+nx-s; i<xs+nx; i++) {
1255       for (j=0; j<nc; j++) {
1256         cnt = 0;
1257         for (l=0; l<s; l++) {
1258           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1259         }
1260         for (k=0; k<nc; k++) {
1261           cols[cnt++] = i*nc + k;
1262         }
1263         if (rank < size-1) {
1264           for (l=0; l<s; l++) {
1265             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1266           }
1267         }
1268         ierr = MatSetValues(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1269         row++;
1270       }
1271     }
1272     ierr = PetscFree(values);CHKERRQ(ierr);
1273     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1274     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1275     ierr = PetscFree(cols);CHKERRQ(ierr);
1276   }
1277   PetscFunctionReturn(0);
1278 }
1279 
1280 /* ---------------------------------------------------------------------------------*/
1281 
1282 #undef __FUNCT__
1283 #define __FUNCT__ "DMCreateMatrix_DA_1d_MPIAIJ"
1284 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da,Mat J)
1285 {
1286   PetscErrorCode         ierr;
1287   PetscInt               xs,nx,i,i1,slot,gxs,gnx;
1288   PetscInt               m,dim,s,*cols = NULL,nc,*rows = NULL,col,cnt,l;
1289   PetscInt               istart,iend;
1290   PetscScalar            *values;
1291   DMDABoundaryType       bx;
1292   ISLocalToGlobalMapping ltog,ltogb;
1293 
1294   PetscFunctionBegin;
1295   /*
1296          nc - number of components per grid point
1297          col - number of colors needed in one direction for single component problem
1298 
1299   */
1300   ierr = DMDAGetInfo(da,&dim,&m,0,0,0,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr);
1301   col  = 2*s + 1;
1302 
1303   ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
1304   ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
1305 
1306   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1307   ierr = MatSeqAIJSetPreallocation(J,col*nc,0);CHKERRQ(ierr);
1308   ierr = MatMPIAIJSetPreallocation(J,col*nc,0,col*nc,0);CHKERRQ(ierr);
1309 
1310   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1311   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1312   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1313   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1314 
1315   /*
1316     For each node in the grid: we get the neighbors in the local (on processor ordering
1317     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1318     PETSc ordering.
1319   */
1320   if (!da->prealloc_only) {
1321     ierr = PetscMalloc2(nc,&rows,col*nc*nc,&cols);CHKERRQ(ierr);
1322     ierr = PetscCalloc1(col*nc*nc,&values);CHKERRQ(ierr);
1323     for (i=xs; i<xs+nx; i++) {
1324       istart = PetscMax(-s,gxs - i);
1325       iend   = PetscMin(s,gxs + gnx - i - 1);
1326       slot   = i - gxs;
1327 
1328       cnt = 0;
1329       for (l=0; l<nc; l++) {
1330         for (i1=istart; i1<iend+1; i1++) {
1331           cols[cnt++] = l + nc*(slot + i1);
1332         }
1333         rows[l] = l + nc*(slot);
1334       }
1335       ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1336     }
1337     ierr = PetscFree(values);CHKERRQ(ierr);
1338     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1339     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1340     ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1341   }
1342   PetscFunctionReturn(0);
1343 }
1344 
1345 #undef __FUNCT__
1346 #define __FUNCT__ "DMCreateMatrix_DA_2d_MPIBAIJ"
1347 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da,Mat J)
1348 {
1349   PetscErrorCode         ierr;
1350   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1351   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
1352   PetscInt               istart,iend,jstart,jend,ii,jj;
1353   MPI_Comm               comm;
1354   PetscScalar            *values;
1355   DMDABoundaryType       bx,by;
1356   DMDAStencilType        st;
1357   ISLocalToGlobalMapping ltog,ltogb;
1358 
1359   PetscFunctionBegin;
1360   /*
1361      nc - number of components per grid point
1362      col - number of colors needed in one direction for single component problem
1363   */
1364   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
1365   col  = 2*s + 1;
1366 
1367   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
1368   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
1369   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1370 
1371   ierr = PetscMalloc1(col*col*nc*nc,&cols);CHKERRQ(ierr);
1372 
1373   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1374   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1375 
1376   /* determine the matrix preallocation information */
1377   ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
1378   for (i=xs; i<xs+nx; i++) {
1379     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1380     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1381     for (j=ys; j<ys+ny; j++) {
1382       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1383       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1384       slot   = i - gxs + gnx*(j - gys);
1385 
1386       /* Find block columns in block row */
1387       cnt = 0;
1388       for (ii=istart; ii<iend+1; ii++) {
1389         for (jj=jstart; jj<jend+1; jj++) {
1390           if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1391             cols[cnt++] = slot + ii + gnx*jj;
1392           }
1393         }
1394       }
1395       ierr = MatPreallocateSetLocal(ltogb,1,&slot,ltogb,cnt,cols,dnz,onz);CHKERRQ(ierr);
1396     }
1397   }
1398   ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1399   ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1400   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1401 
1402   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1403   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1404 
1405   /*
1406     For each node in the grid: we get the neighbors in the local (on processor ordering
1407     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1408     PETSc ordering.
1409   */
1410   if (!da->prealloc_only) {
1411     ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr);
1412     for (i=xs; i<xs+nx; i++) {
1413       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1414       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1415       for (j=ys; j<ys+ny; j++) {
1416         jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1417         jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1418         slot = i - gxs + gnx*(j - gys);
1419         cnt  = 0;
1420         for (ii=istart; ii<iend+1; ii++) {
1421           for (jj=jstart; jj<jend+1; jj++) {
1422             if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1423               cols[cnt++] = slot + ii + gnx*jj;
1424             }
1425           }
1426         }
1427         ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1428       }
1429     }
1430     ierr = PetscFree(values);CHKERRQ(ierr);
1431     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1432     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1433   }
1434   ierr = PetscFree(cols);CHKERRQ(ierr);
1435   PetscFunctionReturn(0);
1436 }
1437 
1438 #undef __FUNCT__
1439 #define __FUNCT__ "DMCreateMatrix_DA_3d_MPIBAIJ"
1440 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da,Mat J)
1441 {
1442   PetscErrorCode         ierr;
1443   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1444   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
1445   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1446   MPI_Comm               comm;
1447   PetscScalar            *values;
1448   DMDABoundaryType       bx,by,bz;
1449   DMDAStencilType        st;
1450   ISLocalToGlobalMapping ltog,ltogb;
1451 
1452   PetscFunctionBegin;
1453   /*
1454          nc - number of components per grid point
1455          col - number of colors needed in one direction for single component problem
1456 
1457   */
1458   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1459   col  = 2*s + 1;
1460 
1461   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1462   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1463   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1464 
1465   ierr = PetscMalloc1(col*col*col,&cols);CHKERRQ(ierr);
1466 
1467   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1468   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1469 
1470   /* determine the matrix preallocation information */
1471   ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1472   for (i=xs; i<xs+nx; i++) {
1473     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1474     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1475     for (j=ys; j<ys+ny; j++) {
1476       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1477       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1478       for (k=zs; k<zs+nz; k++) {
1479         kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1480         kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1481 
1482         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1483 
1484         /* Find block columns in block row */
1485         cnt = 0;
1486         for (ii=istart; ii<iend+1; ii++) {
1487           for (jj=jstart; jj<jend+1; jj++) {
1488             for (kk=kstart; kk<kend+1; kk++) {
1489               if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1490                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1491               }
1492             }
1493           }
1494         }
1495         ierr = MatPreallocateSetLocal(ltogb,1,&slot,ltogb,cnt,cols,dnz,onz);CHKERRQ(ierr);
1496       }
1497     }
1498   }
1499   ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1500   ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1501   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1502 
1503   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1504   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1505 
1506   /*
1507     For each node in the grid: we get the neighbors in the local (on processor ordering
1508     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1509     PETSc ordering.
1510   */
1511   if (!da->prealloc_only) {
1512     ierr = PetscCalloc1(col*col*col*nc*nc,&values);CHKERRQ(ierr);
1513     for (i=xs; i<xs+nx; i++) {
1514       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1515       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1516       for (j=ys; j<ys+ny; j++) {
1517         jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1518         jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1519         for (k=zs; k<zs+nz; k++) {
1520           kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1521           kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1522 
1523           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1524 
1525           cnt = 0;
1526           for (ii=istart; ii<iend+1; ii++) {
1527             for (jj=jstart; jj<jend+1; jj++) {
1528               for (kk=kstart; kk<kend+1; kk++) {
1529                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1530                   cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1531                 }
1532               }
1533             }
1534           }
1535           ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1536         }
1537       }
1538     }
1539     ierr = PetscFree(values);CHKERRQ(ierr);
1540     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1541     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1542   }
1543   ierr = PetscFree(cols);CHKERRQ(ierr);
1544   PetscFunctionReturn(0);
1545 }
1546 
1547 #undef __FUNCT__
1548 #define __FUNCT__ "L2GFilterUpperTriangular"
1549 /*
1550   This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to
1551   identify in the local ordering with periodic domain.
1552 */
1553 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog,PetscInt *row,PetscInt *cnt,PetscInt col[])
1554 {
1555   PetscErrorCode ierr;
1556   PetscInt       i,n;
1557 
1558   PetscFunctionBegin;
1559   ierr = ISLocalToGlobalMappingApply(ltog,1,row,row);CHKERRQ(ierr);
1560   ierr = ISLocalToGlobalMappingApply(ltog,*cnt,col,col);CHKERRQ(ierr);
1561   for (i=0,n=0; i<*cnt; i++) {
1562     if (col[i] >= *row) col[n++] = col[i];
1563   }
1564   *cnt = n;
1565   PetscFunctionReturn(0);
1566 }
1567 
1568 #undef __FUNCT__
1569 #define __FUNCT__ "DMCreateMatrix_DA_2d_MPISBAIJ"
1570 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da,Mat J)
1571 {
1572   PetscErrorCode         ierr;
1573   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1574   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
1575   PetscInt               istart,iend,jstart,jend,ii,jj;
1576   MPI_Comm               comm;
1577   PetscScalar            *values;
1578   DMDABoundaryType       bx,by;
1579   DMDAStencilType        st;
1580   ISLocalToGlobalMapping ltog,ltogb;
1581 
1582   PetscFunctionBegin;
1583   /*
1584      nc - number of components per grid point
1585      col - number of colors needed in one direction for single component problem
1586   */
1587   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
1588   col  = 2*s + 1;
1589 
1590   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
1591   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
1592   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1593 
1594   ierr = PetscMalloc1(col*col*nc*nc,&cols);CHKERRQ(ierr);
1595 
1596   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1597   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1598 
1599   /* determine the matrix preallocation information */
1600   ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
1601   for (i=xs; i<xs+nx; i++) {
1602     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1603     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1604     for (j=ys; j<ys+ny; j++) {
1605       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1606       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1607       slot   = i - gxs + gnx*(j - gys);
1608 
1609       /* Find block columns in block row */
1610       cnt = 0;
1611       for (ii=istart; ii<iend+1; ii++) {
1612         for (jj=jstart; jj<jend+1; jj++) {
1613           if (st == DMDA_STENCIL_BOX || !ii || !jj) {
1614             cols[cnt++] = slot + ii + gnx*jj;
1615           }
1616         }
1617       }
1618       ierr = L2GFilterUpperTriangular(ltogb,&slot,&cnt,cols);CHKERRQ(ierr);
1619       ierr = MatPreallocateSymmetricSet(slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
1620     }
1621   }
1622   ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1623   ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1624   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1625 
1626   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1627   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1628 
1629   /*
1630     For each node in the grid: we get the neighbors in the local (on processor ordering
1631     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1632     PETSc ordering.
1633   */
1634   if (!da->prealloc_only) {
1635     ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr);
1636     for (i=xs; i<xs+nx; i++) {
1637       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1638       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1639       for (j=ys; j<ys+ny; j++) {
1640         jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1641         jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1642         slot   = i - gxs + gnx*(j - gys);
1643 
1644         /* Find block columns in block row */
1645         cnt = 0;
1646         for (ii=istart; ii<iend+1; ii++) {
1647           for (jj=jstart; jj<jend+1; jj++) {
1648             if (st == DMDA_STENCIL_BOX || !ii || !jj) {
1649               cols[cnt++] = slot + ii + gnx*jj;
1650             }
1651           }
1652         }
1653         ierr = L2GFilterUpperTriangular(ltogb,&slot,&cnt,cols);CHKERRQ(ierr);
1654         ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1655       }
1656     }
1657     ierr = PetscFree(values);CHKERRQ(ierr);
1658     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1659     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1660   }
1661   ierr = PetscFree(cols);CHKERRQ(ierr);
1662   PetscFunctionReturn(0);
1663 }
1664 
1665 #undef __FUNCT__
1666 #define __FUNCT__ "DMCreateMatrix_DA_3d_MPISBAIJ"
1667 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da,Mat J)
1668 {
1669   PetscErrorCode         ierr;
1670   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1671   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
1672   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1673   MPI_Comm               comm;
1674   PetscScalar            *values;
1675   DMDABoundaryType       bx,by,bz;
1676   DMDAStencilType        st;
1677   ISLocalToGlobalMapping ltog,ltogb;
1678 
1679   PetscFunctionBegin;
1680   /*
1681      nc - number of components per grid point
1682      col - number of colors needed in one direction for single component problem
1683   */
1684   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1685   col  = 2*s + 1;
1686 
1687   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1688   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1689   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1690 
1691   /* create the matrix */
1692   ierr = PetscMalloc1(col*col*col,&cols);CHKERRQ(ierr);
1693 
1694   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1695   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1696 
1697   /* determine the matrix preallocation information */
1698   ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1699   for (i=xs; i<xs+nx; i++) {
1700     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1701     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1702     for (j=ys; j<ys+ny; j++) {
1703       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1704       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1705       for (k=zs; k<zs+nz; k++) {
1706         kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1707         kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1708 
1709         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1710 
1711         /* Find block columns in block row */
1712         cnt = 0;
1713         for (ii=istart; ii<iend+1; ii++) {
1714           for (jj=jstart; jj<jend+1; jj++) {
1715             for (kk=kstart; kk<kend+1; kk++) {
1716               if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
1717                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1718               }
1719             }
1720           }
1721         }
1722         ierr = L2GFilterUpperTriangular(ltogb,&slot,&cnt,cols);CHKERRQ(ierr);
1723         ierr = MatPreallocateSymmetricSet(slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
1724       }
1725     }
1726   }
1727   ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1728   ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1729   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1730 
1731   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1732   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1733 
1734   /*
1735     For each node in the grid: we get the neighbors in the local (on processor ordering
1736     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1737     PETSc ordering.
1738   */
1739   if (!da->prealloc_only) {
1740     ierr = PetscCalloc1(col*col*col*nc*nc,&values);CHKERRQ(ierr);
1741     for (i=xs; i<xs+nx; i++) {
1742       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1743       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1744       for (j=ys; j<ys+ny; j++) {
1745         jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1746         jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1747         for (k=zs; k<zs+nz; k++) {
1748           kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1749           kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1750 
1751           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1752 
1753           cnt = 0;
1754           for (ii=istart; ii<iend+1; ii++) {
1755             for (jj=jstart; jj<jend+1; jj++) {
1756               for (kk=kstart; kk<kend+1; kk++) {
1757                 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
1758                   cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1759                 }
1760               }
1761             }
1762           }
1763           ierr = L2GFilterUpperTriangular(ltogb,&slot,&cnt,cols);CHKERRQ(ierr);
1764           ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1765         }
1766       }
1767     }
1768     ierr = PetscFree(values);CHKERRQ(ierr);
1769     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1770     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1771   }
1772   ierr = PetscFree(cols);CHKERRQ(ierr);
1773   PetscFunctionReturn(0);
1774 }
1775 
1776 /* ---------------------------------------------------------------------------------*/
1777 
1778 #undef __FUNCT__
1779 #define __FUNCT__ "DMCreateMatrix_DA_3d_MPIAIJ_Fill"
1780 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da,Mat J)
1781 {
1782   PetscErrorCode         ierr;
1783   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1784   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt,l,p,*dnz,*onz;
1785   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1786   DM_DA                  *dd = (DM_DA*)da->data;
1787   PetscInt               ifill_col,*dfill = dd->dfill,*ofill = dd->ofill;
1788   MPI_Comm               comm;
1789   PetscScalar            *values;
1790   DMDABoundaryType       bx,by,bz;
1791   ISLocalToGlobalMapping ltog,ltogb;
1792   DMDAStencilType        st;
1793 
1794   PetscFunctionBegin;
1795   /*
1796          nc - number of components per grid point
1797          col - number of colors needed in one direction for single component problem
1798 
1799   */
1800   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1801   col  = 2*s + 1;
1802   if (bx == DMDA_BOUNDARY_PERIODIC && (m % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
1803                  by 2*stencil_width + 1\n");
1804   if (by == DMDA_BOUNDARY_PERIODIC && (n % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
1805                  by 2*stencil_width + 1\n");
1806   if (bz == DMDA_BOUNDARY_PERIODIC && (p % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
1807                  by 2*stencil_width + 1\n");
1808 
1809   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1810   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1811   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1812 
1813   ierr = PetscMalloc1(col*col*col*nc,&cols);CHKERRQ(ierr);
1814   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1815   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1816 
1817   /* determine the matrix preallocation information */
1818   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1819 
1820 
1821   for (i=xs; i<xs+nx; i++) {
1822     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1823     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1824     for (j=ys; j<ys+ny; j++) {
1825       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1826       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1827       for (k=zs; k<zs+nz; k++) {
1828         kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1829         kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1830 
1831         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1832 
1833         for (l=0; l<nc; l++) {
1834           cnt = 0;
1835           for (ii=istart; ii<iend+1; ii++) {
1836             for (jj=jstart; jj<jend+1; jj++) {
1837               for (kk=kstart; kk<kend+1; kk++) {
1838                 if (ii || jj || kk) {
1839                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1840                     for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1841                   }
1842                 } else {
1843                   if (dfill) {
1844                     for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1845                   } else {
1846                     for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1847                   }
1848                 }
1849               }
1850             }
1851           }
1852           row  = l + nc*(slot);
1853           ierr = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1854         }
1855       }
1856     }
1857   }
1858   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1859   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1860   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1861   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1862   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1863 
1864   /*
1865     For each node in the grid: we get the neighbors in the local (on processor ordering
1866     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1867     PETSc ordering.
1868   */
1869   if (!da->prealloc_only) {
1870     ierr = PetscCalloc1(col*col*col*nc*nc*nc,&values);CHKERRQ(ierr);
1871     for (i=xs; i<xs+nx; i++) {
1872       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1873       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1874       for (j=ys; j<ys+ny; j++) {
1875         jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1876         jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1877         for (k=zs; k<zs+nz; k++) {
1878           kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1879           kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1880 
1881           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1882 
1883           for (l=0; l<nc; l++) {
1884             cnt = 0;
1885             for (ii=istart; ii<iend+1; ii++) {
1886               for (jj=jstart; jj<jend+1; jj++) {
1887                 for (kk=kstart; kk<kend+1; kk++) {
1888                   if (ii || jj || kk) {
1889                     if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1890                       for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1891                     }
1892                   } else {
1893                     if (dfill) {
1894                       for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1895                     } else {
1896                       for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1897                     }
1898                   }
1899                 }
1900               }
1901             }
1902             row  = l + nc*(slot);
1903             ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1904           }
1905         }
1906       }
1907     }
1908     ierr = PetscFree(values);CHKERRQ(ierr);
1909     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1910     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1911   }
1912   ierr = PetscFree(cols);CHKERRQ(ierr);
1913   PetscFunctionReturn(0);
1914 }
1915