xref: /petsc/src/dm/impls/da/fdda.c (revision 0acb5beb625c238e4fe77b440ccaf1beb5d3ca8d)
1 
2 #include <petsc-private/dmdaimpl.h> /*I      "petscdmda.h"     I*/
3 #include <petscmat.h>
4 #include <petsc-private/matimpl.h>
5 
6 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM,ISColoringType,ISColoring*);
7 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM,ISColoringType,ISColoring*);
8 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM,ISColoringType,ISColoring*);
9 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM,ISColoringType,ISColoring*);
10 
11 /*
12    For ghost i that may be negative or greater than the upper bound this
13   maps it into the 0:m-1 range using periodicity
14 */
15 #define SetInRange(i,m) ((i < 0) ? m+i : ((i >= m) ? i-m : i))
16 
17 #undef __FUNCT__
18 #define __FUNCT__ "DMDASetBlockFills_Private"
19 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill,PetscInt w,PetscInt **rfill)
20 {
21   PetscErrorCode ierr;
22   PetscInt       i,j,nz,*fill;
23 
24   PetscFunctionBegin;
25   if (!dfill) PetscFunctionReturn(0);
26 
27   /* count number nonzeros */
28   nz = 0;
29   for (i=0; i<w; i++) {
30     for (j=0; j<w; j++) {
31       if (dfill[w*i+j]) nz++;
32     }
33   }
34   ierr = PetscMalloc((nz + w + 1)*sizeof(PetscInt),&fill);CHKERRQ(ierr);
35   /* construct modified CSR storage of nonzero structure */
36   /*  fill[0 -- w] marks starts of each row of column indices (and end of last row)
37    so fill[1] - fill[0] gives number of nonzeros in first row etc */
38   nz = w + 1;
39   for (i=0; i<w; i++) {
40     fill[i] = nz;
41     for (j=0; j<w; j++) {
42       if (dfill[w*i+j]) {
43         fill[nz] = j;
44         nz++;
45       }
46     }
47   }
48   fill[w] = nz;
49 
50   *rfill = fill;
51   PetscFunctionReturn(0);
52 }
53 
54 #undef __FUNCT__
55 #define __FUNCT__ "DMDASetBlockFills"
56 /*@
57     DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem
58     of the matrix returned by DMCreateMatrix().
59 
60     Logically Collective on DMDA
61 
62     Input Parameter:
63 +   da - the distributed array
64 .   dfill - the fill pattern in the diagonal block (may be NULL, means use dense block)
65 -   ofill - the fill pattern in the off-diagonal blocks
66 
67 
68     Level: developer
69 
70     Notes: This only makes sense when you are doing multicomponent problems but using the
71        MPIAIJ matrix format
72 
73            The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries
74        representing coupling and 0 entries for missing coupling. For example
75 $             dfill[9] = {1, 0, 0,
76 $                         1, 1, 0,
77 $                         0, 1, 1}
78        means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with
79        itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the
80        diagonal block).
81 
82      DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
83      can be represented in the dfill, ofill format
84 
85    Contributed by Glenn Hammond
86 
87 .seealso DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly()
88 
89 @*/
90 PetscErrorCode  DMDASetBlockFills(DM da,const PetscInt *dfill,const PetscInt *ofill)
91 {
92   DM_DA          *dd = (DM_DA*)da->data;
93   PetscErrorCode ierr;
94   PetscInt       i,k,cnt = 1;
95 
96   PetscFunctionBegin;
97   ierr = DMDASetBlockFills_Private(dfill,dd->w,&dd->dfill);CHKERRQ(ierr);
98   ierr = DMDASetBlockFills_Private(ofill,dd->w,&dd->ofill);CHKERRQ(ierr);
99 
100   /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of
101    columns to the left with any nonzeros in them plus 1 */
102   ierr = PetscMalloc(dd->w*sizeof(PetscBool),&dd->ofillcols);CHKERRQ(ierr);
103   ierr = PetscMemzero(dd->ofillcols,dd->w*sizeof(PetscBool));CHKERRQ(ierr);
104   for (i=0; i<dd->w; i++) {
105     for (k=dd->ofill[i]; k<dd->ofill[i+1]; k++) dd->ofillcols[dd->ofill[k]] = 1;
106   }
107   for (i=0; i<dd->w; i++) {
108     if (dd->ofillcols[i]) {
109       dd->ofillcols[i] = cnt++;
110     }
111   }
112   PetscFunctionReturn(0);
113 }
114 
115 
116 #undef __FUNCT__
117 #define __FUNCT__ "DMCreateColoring_DA"
118 PetscErrorCode  DMCreateColoring_DA(DM da,ISColoringType ctype,MatType mtype,ISColoring *coloring)
119 {
120   PetscErrorCode   ierr;
121   PetscInt         dim,m,n,p,nc;
122   DMDABoundaryType bx,by,bz;
123   MPI_Comm         comm;
124   PetscMPIInt      size;
125   PetscBool        isBAIJ;
126   DM_DA            *dd = (DM_DA*)da->data;
127 
128   PetscFunctionBegin;
129   /*
130                                   m
131           ------------------------------------------------------
132          |                                                     |
133          |                                                     |
134          |               ----------------------                |
135          |               |                    |                |
136       n  |           yn  |                    |                |
137          |               |                    |                |
138          |               .---------------------                |
139          |             (xs,ys)     xn                          |
140          |            .                                        |
141          |         (gxs,gys)                                   |
142          |                                                     |
143           -----------------------------------------------------
144   */
145 
146   /*
147          nc - number of components per grid point
148          col - number of colors needed in one direction for single component problem
149 
150   */
151   ierr = DMDAGetInfo(da,&dim,0,0,0,&m,&n,&p,&nc,0,&bx,&by,&bz,0);CHKERRQ(ierr);
152 
153   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
154   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
155   if (ctype == IS_COLORING_GHOSTED) {
156     if (size == 1) {
157       ctype = IS_COLORING_GLOBAL;
158     } else if (dim > 1) {
159       if ((m==1 && bx == DMDA_BOUNDARY_PERIODIC) || (n==1 && by == DMDA_BOUNDARY_PERIODIC) || (p==1 && bz == DMDA_BOUNDARY_PERIODIC)) {
160         SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"IS_COLORING_GHOSTED cannot be used for periodic boundary condition having both ends of the domain  on the same process");
161       }
162     }
163   }
164 
165   /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ
166      matrices is for the blocks, not the individual matrix elements  */
167   ierr = PetscStrcmp(mtype,MATBAIJ,&isBAIJ);CHKERRQ(ierr);
168   if (!isBAIJ) {ierr = PetscStrcmp(mtype,MATMPIBAIJ,&isBAIJ);CHKERRQ(ierr);}
169   if (!isBAIJ) {ierr = PetscStrcmp(mtype,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);}
170   if (isBAIJ) {
171     dd->w  = 1;
172     dd->xs = dd->xs/nc;
173     dd->xe = dd->xe/nc;
174     dd->Xs = dd->Xs/nc;
175     dd->Xe = dd->Xe/nc;
176   }
177 
178   /*
179      We do not provide a getcoloring function in the DMDA operations because
180    the basic DMDA does not know about matrices. We think of DMDA as being more
181    more low-level then matrices.
182   */
183   if (dim == 1) {
184     ierr = DMCreateColoring_DA_1d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
185   } else if (dim == 2) {
186     ierr =  DMCreateColoring_DA_2d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
187   } else if (dim == 3) {
188     ierr =  DMCreateColoring_DA_3d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
189   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not done for %D dimension, send us mail petsc-maint@mcs.anl.gov for code",dim);
190   if (isBAIJ) {
191     dd->w  = nc;
192     dd->xs = dd->xs*nc;
193     dd->xe = dd->xe*nc;
194     dd->Xs = dd->Xs*nc;
195     dd->Xe = dd->Xe*nc;
196   }
197   PetscFunctionReturn(0);
198 }
199 
200 /* ---------------------------------------------------------------------------------*/
201 
202 #undef __FUNCT__
203 #define __FUNCT__ "DMCreateColoring_DA_2d_MPIAIJ"
204 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
205 {
206   PetscErrorCode   ierr;
207   PetscInt         xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,M,N,dim,s,k,nc,col;
208   PetscInt         ncolors;
209   MPI_Comm         comm;
210   DMDABoundaryType bx,by;
211   DMDAStencilType  st;
212   ISColoringValue  *colors;
213   DM_DA            *dd = (DM_DA*)da->data;
214 
215   PetscFunctionBegin;
216   /*
217          nc - number of components per grid point
218          col - number of colors needed in one direction for single component problem
219 
220   */
221   ierr = DMDAGetInfo(da,&dim,&m,&n,0,&M,&N,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
222   col  = 2*s + 1;
223   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
224   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
225   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
226 
227   /* special case as taught to us by Paul Hovland */
228   if (st == DMDA_STENCIL_STAR && s == 1) {
229     ierr = DMCreateColoring_DA_2d_5pt_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
230   } else {
231 
232     if (bx == DMDA_BOUNDARY_PERIODIC && (m % col)) SETERRQ2(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X (%d) is divisible\n\
233                                                             by 2*stencil_width + 1 (%d)\n", m, col);
234     if (by == DMDA_BOUNDARY_PERIODIC && (n % col)) SETERRQ2(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y (%d) is divisible\n\
235                                                             by 2*stencil_width + 1 (%d)\n", n, col);
236     if (ctype == IS_COLORING_GLOBAL) {
237       if (!dd->localcoloring) {
238         ierr = PetscMalloc(nc*nx*ny*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
239         ii   = 0;
240         for (j=ys; j<ys+ny; j++) {
241           for (i=xs; i<xs+nx; i++) {
242             for (k=0; k<nc; k++) {
243               colors[ii++] = k + nc*((i % col) + col*(j % col));
244             }
245           }
246         }
247         ncolors = nc + nc*(col-1 + col*(col-1));
248         ierr    = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,&dd->localcoloring);CHKERRQ(ierr);
249       }
250       *coloring = dd->localcoloring;
251     } else if (ctype == IS_COLORING_GHOSTED) {
252       if (!dd->ghostedcoloring) {
253         ierr = PetscMalloc(nc*gnx*gny*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
254         ii   = 0;
255         for (j=gys; j<gys+gny; j++) {
256           for (i=gxs; i<gxs+gnx; i++) {
257             for (k=0; k<nc; k++) {
258               /* the complicated stuff is to handle periodic boundaries */
259               colors[ii++] = k + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col));
260             }
261           }
262         }
263         ncolors = nc + nc*(col - 1 + col*(col-1));
264         ierr    = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,&dd->ghostedcoloring);CHKERRQ(ierr);
265         /* PetscIntView(ncolors,(PetscInt*)colors,0); */
266 
267         ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_GHOSTED);CHKERRQ(ierr);
268       }
269       *coloring = dd->ghostedcoloring;
270     } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
271   }
272   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
273   PetscFunctionReturn(0);
274 }
275 
276 /* ---------------------------------------------------------------------------------*/
277 
278 #undef __FUNCT__
279 #define __FUNCT__ "DMCreateColoring_DA_3d_MPIAIJ"
280 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
281 {
282   PetscErrorCode   ierr;
283   PetscInt         xs,ys,nx,ny,i,j,gxs,gys,gnx,gny,m,n,p,dim,s,k,nc,col,zs,gzs,ii,l,nz,gnz,M,N,P;
284   PetscInt         ncolors;
285   MPI_Comm         comm;
286   DMDABoundaryType bx,by,bz;
287   DMDAStencilType  st;
288   ISColoringValue  *colors;
289   DM_DA            *dd = (DM_DA*)da->data;
290 
291   PetscFunctionBegin;
292   /*
293          nc - number of components per grid point
294          col - number of colors needed in one direction for single component problem
295 
296   */
297   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
298   col  = 2*s + 1;
299   if (bx == DMDA_BOUNDARY_PERIODIC && (m % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
300                                                          by 2*stencil_width + 1\n");
301   if (by == DMDA_BOUNDARY_PERIODIC && (n % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
302                                                          by 2*stencil_width + 1\n");
303   if (bz == DMDA_BOUNDARY_PERIODIC && (p % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
304                                                          by 2*stencil_width + 1\n");
305 
306   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
307   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
308   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
309 
310   /* create the coloring */
311   if (ctype == IS_COLORING_GLOBAL) {
312     if (!dd->localcoloring) {
313       ierr = PetscMalloc(nc*nx*ny*nz*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
314       ii   = 0;
315       for (k=zs; k<zs+nz; k++) {
316         for (j=ys; j<ys+ny; j++) {
317           for (i=xs; i<xs+nx; i++) {
318             for (l=0; l<nc; l++) {
319               colors[ii++] = l + nc*((i % col) + col*(j % col) + col*col*(k % col));
320             }
321           }
322         }
323       }
324       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
325       ierr    = ISColoringCreate(comm,ncolors,nc*nx*ny*nz,colors,&dd->localcoloring);CHKERRQ(ierr);
326     }
327     *coloring = dd->localcoloring;
328   } else if (ctype == IS_COLORING_GHOSTED) {
329     if (!dd->ghostedcoloring) {
330       ierr = PetscMalloc(nc*gnx*gny*gnz*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
331       ii   = 0;
332       for (k=gzs; k<gzs+gnz; k++) {
333         for (j=gys; j<gys+gny; j++) {
334           for (i=gxs; i<gxs+gnx; i++) {
335             for (l=0; l<nc; l++) {
336               /* the complicated stuff is to handle periodic boundaries */
337               colors[ii++] = l + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col) + col*col*(SetInRange(k,p) % col));
338             }
339           }
340         }
341       }
342       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
343       ierr    = ISColoringCreate(comm,ncolors,nc*gnx*gny*gnz,colors,&dd->ghostedcoloring);CHKERRQ(ierr);
344       ierr    = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_GHOSTED);CHKERRQ(ierr);
345     }
346     *coloring = dd->ghostedcoloring;
347   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
348   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
349   PetscFunctionReturn(0);
350 }
351 
352 /* ---------------------------------------------------------------------------------*/
353 
354 #undef __FUNCT__
355 #define __FUNCT__ "DMCreateColoring_DA_1d_MPIAIJ"
356 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
357 {
358   PetscErrorCode   ierr;
359   PetscInt         xs,nx,i,i1,gxs,gnx,l,m,M,dim,s,nc,col;
360   PetscInt         ncolors;
361   MPI_Comm         comm;
362   DMDABoundaryType bx;
363   ISColoringValue  *colors;
364   DM_DA            *dd = (DM_DA*)da->data;
365 
366   PetscFunctionBegin;
367   /*
368          nc - number of components per grid point
369          col - number of colors needed in one direction for single component problem
370 
371   */
372   ierr = DMDAGetInfo(da,&dim,&m,0,0,&M,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr);
373   col  = 2*s + 1;
374 
375   if (bx == DMDA_BOUNDARY_PERIODIC && (m % col)) SETERRQ2(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points %d is divisible\n\
376                                                           by 2*stencil_width + 1 %d\n",(int)m,(int)col);
377 
378   ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
379   ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
380   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
381 
382   /* create the coloring */
383   if (ctype == IS_COLORING_GLOBAL) {
384     if (!dd->localcoloring) {
385       ierr = PetscMalloc(nc*nx*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
386       if (dd->ofillcols) {
387         PetscInt tc = 0;
388         for (i=0; i<nc; i++) tc += (PetscInt) (dd->ofillcols[i] > 0);
389         i1 = 0;
390         for (i=xs; i<xs+nx; i++) {
391           for (l=0; l<nc; l++) {
392             if (dd->ofillcols[l] && (i % col)) {
393               colors[i1++] =  nc - 1 + tc*((i % col) - 1) + dd->ofillcols[l];
394             } else {
395               colors[i1++] = l;
396             }
397           }
398         }
399         ncolors = nc + 2*s*tc;
400       } else {
401         i1 = 0;
402         for (i=xs; i<xs+nx; i++) {
403           for (l=0; l<nc; l++) {
404             colors[i1++] = l + nc*(i % col);
405           }
406         }
407         ncolors = nc + nc*(col-1);
408       }
409       ierr = ISColoringCreate(comm,ncolors,nc*nx,colors,&dd->localcoloring);CHKERRQ(ierr);
410     }
411     *coloring = dd->localcoloring;
412   } else if (ctype == IS_COLORING_GHOSTED) {
413     if (!dd->ghostedcoloring) {
414       ierr = PetscMalloc(nc*gnx*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
415       i1   = 0;
416       for (i=gxs; i<gxs+gnx; i++) {
417         for (l=0; l<nc; l++) {
418           /* the complicated stuff is to handle periodic boundaries */
419           colors[i1++] = l + nc*(SetInRange(i,m) % col);
420         }
421       }
422       ncolors = nc + nc*(col-1);
423       ierr    = ISColoringCreate(comm,ncolors,nc*gnx,colors,&dd->ghostedcoloring);CHKERRQ(ierr);
424       ierr    = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_GHOSTED);CHKERRQ(ierr);
425     }
426     *coloring = dd->ghostedcoloring;
427   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
428   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
429   PetscFunctionReturn(0);
430 }
431 
432 #undef __FUNCT__
433 #define __FUNCT__ "DMCreateColoring_DA_2d_5pt_MPIAIJ"
434 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
435 {
436   PetscErrorCode   ierr;
437   PetscInt         xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,dim,s,k,nc;
438   PetscInt         ncolors;
439   MPI_Comm         comm;
440   DMDABoundaryType bx,by;
441   ISColoringValue  *colors;
442   DM_DA            *dd = (DM_DA*)da->data;
443 
444   PetscFunctionBegin;
445   /*
446          nc - number of components per grid point
447          col - number of colors needed in one direction for single component problem
448 
449   */
450   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,0);CHKERRQ(ierr);
451   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
452   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
453   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
454 
455   if (bx == DMDA_BOUNDARY_PERIODIC && (m % 5)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible by 5\n");
456   if (by == DMDA_BOUNDARY_PERIODIC && (n % 5)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible by 5\n");
457 
458   /* create the coloring */
459   if (ctype == IS_COLORING_GLOBAL) {
460     if (!dd->localcoloring) {
461       ierr = PetscMalloc(nc*nx*ny*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
462       ii   = 0;
463       for (j=ys; j<ys+ny; j++) {
464         for (i=xs; i<xs+nx; i++) {
465           for (k=0; k<nc; k++) {
466             colors[ii++] = k + nc*((3*j+i) % 5);
467           }
468         }
469       }
470       ncolors = 5*nc;
471       ierr    = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,&dd->localcoloring);CHKERRQ(ierr);
472     }
473     *coloring = dd->localcoloring;
474   } else if (ctype == IS_COLORING_GHOSTED) {
475     if (!dd->ghostedcoloring) {
476       ierr = PetscMalloc(nc*gnx*gny*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
477       ii = 0;
478       for (j=gys; j<gys+gny; j++) {
479         for (i=gxs; i<gxs+gnx; i++) {
480           for (k=0; k<nc; k++) {
481             colors[ii++] = k + nc*((3*SetInRange(j,n) + SetInRange(i,m)) % 5);
482           }
483         }
484       }
485       ncolors = 5*nc;
486       ierr    = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,&dd->ghostedcoloring);CHKERRQ(ierr);
487       ierr    = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_GHOSTED);CHKERRQ(ierr);
488     }
489     *coloring = dd->ghostedcoloring;
490   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
491   PetscFunctionReturn(0);
492 }
493 
494 /* =========================================================================== */
495 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM,Mat);
496 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM,Mat);
497 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM,Mat);
498 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM,Mat);
499 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM,Mat);
500 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM,Mat);
501 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM,Mat);
502 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM,Mat);
503 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM,Mat);
504 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM,Mat);
505 
506 #undef __FUNCT__
507 #define __FUNCT__ "MatSetupDM"
508 /*@C
509    MatSetupDM - Sets the DMDA that is to be used by the HYPRE_StructMatrix PETSc matrix
510 
511    Logically Collective on Mat
512 
513    Input Parameters:
514 +  mat - the matrix
515 -  da - the da
516 
517    Level: intermediate
518 
519 @*/
520 PetscErrorCode MatSetupDM(Mat mat,DM da)
521 {
522   PetscErrorCode ierr;
523 
524   PetscFunctionBegin;
525   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
526   PetscValidHeaderSpecific(da,DM_CLASSID,1);
527   ierr = PetscTryMethod(mat,"MatSetupDM_C",(Mat,DM),(mat,da));CHKERRQ(ierr);
528   PetscFunctionReturn(0);
529 }
530 
531 #undef __FUNCT__
532 #define __FUNCT__ "MatView_MPI_DA"
533 PetscErrorCode  MatView_MPI_DA(Mat A,PetscViewer viewer)
534 {
535   DM                da;
536   PetscErrorCode    ierr;
537   const char        *prefix;
538   Mat               Anatural;
539   AO                ao;
540   PetscInt          rstart,rend,*petsc,i;
541   IS                is;
542   MPI_Comm          comm;
543   PetscViewerFormat format;
544 
545   PetscFunctionBegin;
546   /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */
547   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
548   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(0);
549 
550   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
551   ierr = MatGetDM(A, &da);CHKERRQ(ierr);
552   if (!da) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
553 
554   ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);
555   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
556   ierr = PetscMalloc((rend-rstart)*sizeof(PetscInt),&petsc);CHKERRQ(ierr);
557   for (i=rstart; i<rend; i++) petsc[i-rstart] = i;
558   ierr = AOApplicationToPetsc(ao,rend-rstart,petsc);CHKERRQ(ierr);
559   ierr = ISCreateGeneral(comm,rend-rstart,petsc,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
560 
561   /* call viewer on natural ordering */
562   ierr = MatGetSubMatrix(A,is,is,MAT_INITIAL_MATRIX,&Anatural);CHKERRQ(ierr);
563   ierr = ISDestroy(&is);CHKERRQ(ierr);
564   ierr = PetscObjectGetOptionsPrefix((PetscObject)A,&prefix);CHKERRQ(ierr);
565   ierr = PetscObjectSetOptionsPrefix((PetscObject)Anatural,prefix);CHKERRQ(ierr);
566   ierr = PetscObjectSetName((PetscObject)Anatural,((PetscObject)A)->name);CHKERRQ(ierr);
567   ierr = MatView(Anatural,viewer);CHKERRQ(ierr);
568   ierr = MatDestroy(&Anatural);CHKERRQ(ierr);
569   PetscFunctionReturn(0);
570 }
571 
572 #undef __FUNCT__
573 #define __FUNCT__ "MatLoad_MPI_DA"
574 PetscErrorCode  MatLoad_MPI_DA(Mat A,PetscViewer viewer)
575 {
576   DM             da;
577   PetscErrorCode ierr;
578   Mat            Anatural,Aapp;
579   AO             ao;
580   PetscInt       rstart,rend,*app,i;
581   IS             is;
582   MPI_Comm       comm;
583 
584   PetscFunctionBegin;
585   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
586   ierr = MatGetDM(A, &da);CHKERRQ(ierr);
587   if (!da) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
588 
589   /* Load the matrix in natural ordering */
590   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Anatural);CHKERRQ(ierr);
591   ierr = MatSetType(Anatural,((PetscObject)A)->type_name);CHKERRQ(ierr);
592   ierr = MatSetSizes(Anatural,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
593   ierr = MatLoad(Anatural,viewer);CHKERRQ(ierr);
594 
595   /* Map natural ordering to application ordering and create IS */
596   ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);
597   ierr = MatGetOwnershipRange(Anatural,&rstart,&rend);CHKERRQ(ierr);
598   ierr = PetscMalloc((rend-rstart)*sizeof(PetscInt),&app);CHKERRQ(ierr);
599   for (i=rstart; i<rend; i++) app[i-rstart] = i;
600   ierr = AOPetscToApplication(ao,rend-rstart,app);CHKERRQ(ierr);
601   ierr = ISCreateGeneral(comm,rend-rstart,app,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
602 
603   /* Do permutation and replace header */
604   ierr = MatGetSubMatrix(Anatural,is,is,MAT_INITIAL_MATRIX,&Aapp);CHKERRQ(ierr);
605   ierr = MatHeaderReplace(A,Aapp);CHKERRQ(ierr);
606   ierr = ISDestroy(&is);CHKERRQ(ierr);
607   ierr = MatDestroy(&Anatural);CHKERRQ(ierr);
608   PetscFunctionReturn(0);
609 }
610 
611 #undef __FUNCT__
612 #define __FUNCT__ "DMCreateMatrix_DA"
613 PetscErrorCode DMCreateMatrix_DA(DM da, MatType mtype,Mat *J)
614 {
615   PetscErrorCode ierr;
616   PetscInt       dim,dof,nx,ny,nz,dims[3],starts[3],M,N,P;
617   Mat            A;
618   MPI_Comm       comm;
619   MatType        Atype;
620   PetscSection   section, sectionGlobal;
621   void           (*aij)(void)=NULL,(*baij)(void)=NULL,(*sbaij)(void)=NULL;
622   MatType        ttype[256];
623   PetscBool      flg;
624   PetscMPIInt    size;
625   DM_DA          *dd = (DM_DA*)da->data;
626 
627   PetscFunctionBegin;
628 #if !defined(PETSC_USE_DYNAMIC_LIBRARIES)
629   ierr = MatInitializePackage();CHKERRQ(ierr);
630 #endif
631   if (!mtype) mtype = MATAIJ;
632   ierr = PetscStrcpy((char*)ttype,mtype);CHKERRQ(ierr);
633   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)da),((PetscObject)da)->prefix,"DMDA options","Mat");CHKERRQ(ierr);
634   ierr = PetscOptionsList("-dm_mat_type","Matrix type","MatSetType",MatList,mtype,(char*)ttype,256,&flg);CHKERRQ(ierr);
635   ierr = PetscOptionsEnd();
636 
637   ierr = DMGetDefaultSection(da, &section);CHKERRQ(ierr);
638   if (section) {
639     PetscInt  bs = -1;
640     PetscInt  localSize;
641     PetscBool isShell, isBlock, isSeqBlock, isMPIBlock, isSymBlock, isSymSeqBlock, isSymMPIBlock, isSymmetric;
642 
643     ierr = DMGetDefaultGlobalSection(da, &sectionGlobal);CHKERRQ(ierr);
644     ierr = PetscSectionGetConstrainedStorageSize(sectionGlobal, &localSize);CHKERRQ(ierr);
645     ierr = MatCreate(PetscObjectComm((PetscObject)da), J);CHKERRQ(ierr);
646     ierr = MatSetSizes(*J, localSize, localSize, PETSC_DETERMINE, PETSC_DETERMINE);CHKERRQ(ierr);
647     ierr = MatSetType(*J, mtype);CHKERRQ(ierr);
648     ierr = MatSetFromOptions(*J);CHKERRQ(ierr);
649     ierr = PetscStrcmp(mtype, MATSHELL, &isShell);CHKERRQ(ierr);
650     ierr = PetscStrcmp(mtype, MATBAIJ, &isBlock);CHKERRQ(ierr);
651     ierr = PetscStrcmp(mtype, MATSEQBAIJ, &isSeqBlock);CHKERRQ(ierr);
652     ierr = PetscStrcmp(mtype, MATMPIBAIJ, &isMPIBlock);CHKERRQ(ierr);
653     ierr = PetscStrcmp(mtype, MATSBAIJ, &isSymBlock);CHKERRQ(ierr);
654     ierr = PetscStrcmp(mtype, MATSEQSBAIJ, &isSymSeqBlock);CHKERRQ(ierr);
655     ierr = PetscStrcmp(mtype, MATMPISBAIJ, &isSymMPIBlock);CHKERRQ(ierr);
656     /* Check for symmetric storage */
657     isSymmetric = (PetscBool) (isSymBlock || isSymSeqBlock || isSymMPIBlock);
658     if (isSymmetric) {
659       ierr = MatSetOption(*J, MAT_IGNORE_LOWER_TRIANGULAR, PETSC_TRUE);CHKERRQ(ierr);
660     }
661     if (!isShell) {
662       PetscInt *dnz, *onz, *dnzu, *onzu, bsLocal;
663 
664       if (bs < 0) {
665         if (isBlock || isSeqBlock || isMPIBlock || isSymBlock || isSymSeqBlock || isSymMPIBlock) {
666           PetscInt pStart, pEnd, p, dof;
667 
668           ierr = PetscSectionGetChart(sectionGlobal, &pStart, &pEnd);CHKERRQ(ierr);
669           for (p = pStart; p < pEnd; ++p) {
670             ierr = PetscSectionGetDof(sectionGlobal, p, &dof);CHKERRQ(ierr);
671             if (dof) {
672               bs = dof;
673               break;
674             }
675           }
676         } else {
677           bs = 1;
678         }
679         /* Must have same blocksize on all procs (some might have no points) */
680         bsLocal = bs;
681         ierr    = MPI_Allreduce(&bsLocal, &bs, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)da));CHKERRQ(ierr);
682       }
683       ierr = PetscMalloc4(localSize/bs, PetscInt, &dnz, localSize/bs, PetscInt, &onz, localSize/bs, PetscInt, &dnzu, localSize/bs, PetscInt, &onzu);CHKERRQ(ierr);
684       ierr = PetscMemzero(dnz,  localSize/bs * sizeof(PetscInt));CHKERRQ(ierr);
685       ierr = PetscMemzero(onz,  localSize/bs * sizeof(PetscInt));CHKERRQ(ierr);
686       ierr = PetscMemzero(dnzu, localSize/bs * sizeof(PetscInt));CHKERRQ(ierr);
687       ierr = PetscMemzero(onzu, localSize/bs * sizeof(PetscInt));CHKERRQ(ierr);
688       /* ierr = DMPlexPreallocateOperator(dm, bs, section, sectionGlobal, dnz, onz, dnzu, onzu, *J, fillMatrix);CHKERRQ(ierr); */
689       ierr = PetscFree4(dnz, onz, dnzu, onzu);CHKERRQ(ierr);
690     }
691   }
692   /*
693                                   m
694           ------------------------------------------------------
695          |                                                     |
696          |                                                     |
697          |               ----------------------                |
698          |               |                    |                |
699       n  |           ny  |                    |                |
700          |               |                    |                |
701          |               .---------------------                |
702          |             (xs,ys)     nx                          |
703          |            .                                        |
704          |         (gxs,gys)                                   |
705          |                                                     |
706           -----------------------------------------------------
707   */
708 
709   /*
710          nc - number of components per grid point
711          col - number of colors needed in one direction for single component problem
712 
713   */
714   M   = dd->M;
715   N   = dd->N;
716   P   = dd->P;
717   dim = dd->dim;
718   dof = dd->w;
719   /* ierr = DMDAGetInfo(da,&dim,&M,&N,&P,0,0,0,&dof,0,0,0,0,0);CHKERRQ(ierr); */
720   ierr = DMDAGetCorners(da,0,0,0,&nx,&ny,&nz);CHKERRQ(ierr);
721   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
722   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
723   ierr = MatSetSizes(A,dof*nx*ny*nz,dof*nx*ny*nz,dof*M*N*P,dof*M*N*P);CHKERRQ(ierr);
724   ierr = MatSetType(A,(MatType)ttype);CHKERRQ(ierr);
725   ierr = MatSetDM(A,da);CHKERRQ(ierr);
726   ierr = MatSetFromOptions(A);CHKERRQ(ierr);
727   ierr = MatGetType(A,&Atype);CHKERRQ(ierr);
728   /*
729      We do not provide a getmatrix function in the DMDA operations because
730    the basic DMDA does not know about matrices. We think of DMDA as being more
731    more low-level than matrices. This is kind of cheating but, cause sometimes
732    we think of DMDA has higher level than matrices.
733 
734      We could switch based on Atype (or mtype), but we do not since the
735    specialized setting routines depend only the particular preallocation
736    details of the matrix, not the type itself.
737   */
738   ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJSetPreallocation_C",&aij);CHKERRQ(ierr);
739   if (!aij) {
740     ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJSetPreallocation_C",&aij);CHKERRQ(ierr);
741   }
742   if (!aij) {
743     ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIBAIJSetPreallocation_C",&baij);CHKERRQ(ierr);
744     if (!baij) {
745       ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",&baij);CHKERRQ(ierr);
746     }
747     if (!baij) {
748       ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPISBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr);
749       if (!sbaij) {
750         ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr);
751       }
752     }
753   }
754   if (aij) {
755     if (dim == 1) {
756       if (dd->ofill) {
757         ierr = DMCreateMatrix_DA_1d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
758       } else {
759         ierr = DMCreateMatrix_DA_1d_MPIAIJ(da,A);CHKERRQ(ierr);
760       }
761     } else if (dim == 2) {
762       if (dd->ofill) {
763         ierr = DMCreateMatrix_DA_2d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
764       } else {
765         ierr = DMCreateMatrix_DA_2d_MPIAIJ(da,A);CHKERRQ(ierr);
766       }
767     } else if (dim == 3) {
768       if (dd->ofill) {
769         ierr = DMCreateMatrix_DA_3d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
770       } else {
771         ierr = DMCreateMatrix_DA_3d_MPIAIJ(da,A);CHKERRQ(ierr);
772       }
773     }
774   } else if (baij) {
775     if (dim == 2) {
776       ierr = DMCreateMatrix_DA_2d_MPIBAIJ(da,A);CHKERRQ(ierr);
777     } else if (dim == 3) {
778       ierr = DMCreateMatrix_DA_3d_MPIBAIJ(da,A);CHKERRQ(ierr);
779     } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
780   } else if (sbaij) {
781     if (dim == 2) {
782       ierr = DMCreateMatrix_DA_2d_MPISBAIJ(da,A);CHKERRQ(ierr);
783     } else if (dim == 3) {
784       ierr = DMCreateMatrix_DA_3d_MPISBAIJ(da,A);CHKERRQ(ierr);
785     } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
786   } else {
787     ISLocalToGlobalMapping ltog,ltogb;
788     ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
789     ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
790     ierr = MatSetUp(A);CHKERRQ(ierr);
791     ierr = MatSetLocalToGlobalMapping(A,ltog,ltog);CHKERRQ(ierr);
792     ierr = MatSetLocalToGlobalMappingBlock(A,ltogb,ltogb);CHKERRQ(ierr);
793   }
794   ierr = DMDAGetGhostCorners(da,&starts[0],&starts[1],&starts[2],&dims[0],&dims[1],&dims[2]);CHKERRQ(ierr);
795   ierr = MatSetStencil(A,dim,dims,starts,dof);CHKERRQ(ierr);
796   ierr = MatSetDM(A,da);CHKERRQ(ierr);
797   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
798   if (size > 1) {
799     /* change viewer to display matrix in natural ordering */
800     ierr = MatShellSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA);CHKERRQ(ierr);
801     ierr = MatShellSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA);CHKERRQ(ierr);
802   }
803   *J = A;
804   PetscFunctionReturn(0);
805 }
806 
807 /* ---------------------------------------------------------------------------------*/
808 #undef __FUNCT__
809 #define __FUNCT__ "DMCreateMatrix_DA_2d_MPIAIJ"
810 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da,Mat J)
811 {
812   PetscErrorCode         ierr;
813   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p;
814   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
815   MPI_Comm               comm;
816   PetscScalar            *values;
817   DMDABoundaryType       bx,by;
818   ISLocalToGlobalMapping ltog,ltogb;
819   DMDAStencilType        st;
820 
821   PetscFunctionBegin;
822   /*
823          nc - number of components per grid point
824          col - number of colors needed in one direction for single component problem
825 
826   */
827   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
828   col  = 2*s + 1;
829   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
830   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
831   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
832 
833   ierr = PetscMalloc2(nc,PetscInt,&rows,col*col*nc*nc,PetscInt,&cols);CHKERRQ(ierr);
834   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
835   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
836 
837   /* determine the matrix preallocation information */
838   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
839   for (i=xs; i<xs+nx; i++) {
840 
841     pstart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
842     pend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
843 
844     for (j=ys; j<ys+ny; j++) {
845       slot = i - gxs + gnx*(j - gys);
846 
847       lstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
848       lend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
849 
850       cnt = 0;
851       for (k=0; k<nc; k++) {
852         for (l=lstart; l<lend+1; l++) {
853           for (p=pstart; p<pend+1; p++) {
854             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
855               cols[cnt++] = k + nc*(slot + gnx*l + p);
856             }
857           }
858         }
859         rows[k] = k + nc*(slot);
860       }
861       ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
862     }
863   }
864   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
865   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
866   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
867   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
868 
869   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
870   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
871 
872   /*
873     For each node in the grid: we get the neighbors in the local (on processor ordering
874     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
875     PETSc ordering.
876   */
877   if (!da->prealloc_only) {
878     ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
879     ierr = PetscMemzero(values,col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
880     for (i=xs; i<xs+nx; i++) {
881 
882       pstart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
883       pend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
884 
885       for (j=ys; j<ys+ny; j++) {
886         slot = i - gxs + gnx*(j - gys);
887 
888         lstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
889         lend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
890 
891         cnt = 0;
892         for (k=0; k<nc; k++) {
893           for (l=lstart; l<lend+1; l++) {
894             for (p=pstart; p<pend+1; p++) {
895               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
896                 cols[cnt++] = k + nc*(slot + gnx*l + p);
897               }
898             }
899           }
900           rows[k] = k + nc*(slot);
901         }
902         ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
903       }
904     }
905     ierr = PetscFree(values);CHKERRQ(ierr);
906     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
907     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
908   }
909   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
910   PetscFunctionReturn(0);
911 }
912 
913 #undef __FUNCT__
914 #define __FUNCT__ "DMCreateMatrix_DA_2d_MPIAIJ_Fill"
915 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da,Mat J)
916 {
917   PetscErrorCode         ierr;
918   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
919   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt,l,p;
920   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
921   DM_DA                  *dd = (DM_DA*)da->data;
922   PetscInt               ifill_col,*ofill = dd->ofill, *dfill = dd->dfill;
923   MPI_Comm               comm;
924   PetscScalar            *values;
925   DMDABoundaryType       bx,by;
926   ISLocalToGlobalMapping ltog,ltogb;
927   DMDAStencilType        st;
928 
929   PetscFunctionBegin;
930   /*
931          nc - number of components per grid point
932          col - number of colors needed in one direction for single component problem
933 
934   */
935   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
936   col  = 2*s + 1;
937   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
938   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
939   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
940 
941   ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscInt),&cols);CHKERRQ(ierr);
942   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
943   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
944 
945   /* determine the matrix preallocation information */
946   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
947   for (i=xs; i<xs+nx; i++) {
948 
949     pstart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
950     pend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
951 
952     for (j=ys; j<ys+ny; j++) {
953       slot = i - gxs + gnx*(j - gys);
954 
955       lstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
956       lend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
957 
958       for (k=0; k<nc; k++) {
959         cnt = 0;
960         for (l=lstart; l<lend+1; l++) {
961           for (p=pstart; p<pend+1; p++) {
962             if (l || p) {
963               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
964                 for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p);
965               }
966             } else {
967               if (dfill) {
968                 for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p);
969               } else {
970                 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p);
971               }
972             }
973           }
974         }
975         row  = k + nc*(slot);
976         ierr = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
977       }
978     }
979   }
980   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
981   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
982   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
983   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
984   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
985 
986   /*
987     For each node in the grid: we get the neighbors in the local (on processor ordering
988     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
989     PETSc ordering.
990   */
991   if (!da->prealloc_only) {
992     ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
993     ierr = PetscMemzero(values,col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
994     for (i=xs; i<xs+nx; i++) {
995 
996       pstart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
997       pend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
998 
999       for (j=ys; j<ys+ny; j++) {
1000         slot = i - gxs + gnx*(j - gys);
1001 
1002         lstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1003         lend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1004 
1005         for (k=0; k<nc; k++) {
1006           cnt = 0;
1007           for (l=lstart; l<lend+1; l++) {
1008             for (p=pstart; p<pend+1; p++) {
1009               if (l || p) {
1010                 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
1011                   for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p);
1012                 }
1013               } else {
1014                 if (dfill) {
1015                   for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p);
1016                 } else {
1017                   for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p);
1018                 }
1019               }
1020             }
1021           }
1022           row  = k + nc*(slot);
1023           ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1024         }
1025       }
1026     }
1027     ierr = PetscFree(values);CHKERRQ(ierr);
1028     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1029     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1030   }
1031   ierr = PetscFree(cols);CHKERRQ(ierr);
1032   PetscFunctionReturn(0);
1033 }
1034 
1035 /* ---------------------------------------------------------------------------------*/
1036 
1037 #undef __FUNCT__
1038 #define __FUNCT__ "DMCreateMatrix_DA_3d_MPIAIJ"
1039 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da,Mat J)
1040 {
1041   PetscErrorCode         ierr;
1042   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1043   PetscInt               m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL;
1044   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1045   MPI_Comm               comm;
1046   PetscScalar            *values;
1047   DMDABoundaryType       bx,by,bz;
1048   ISLocalToGlobalMapping ltog,ltogb;
1049   DMDAStencilType        st;
1050 
1051   PetscFunctionBegin;
1052   /*
1053          nc - number of components per grid point
1054          col - number of colors needed in one direction for single component problem
1055 
1056   */
1057   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1058   col  = 2*s + 1;
1059 
1060   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1061   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1062   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1063 
1064   ierr = PetscMalloc2(nc,PetscInt,&rows,col*col*col*nc*nc,PetscInt,&cols);CHKERRQ(ierr);
1065   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1066   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1067 
1068   /* determine the matrix preallocation information */
1069   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1070   for (i=xs; i<xs+nx; i++) {
1071     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1072     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1073     for (j=ys; j<ys+ny; j++) {
1074       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1075       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1076       for (k=zs; k<zs+nz; k++) {
1077         kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1078         kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1079 
1080         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1081 
1082         cnt = 0;
1083         for (l=0; l<nc; l++) {
1084           for (ii=istart; ii<iend+1; ii++) {
1085             for (jj=jstart; jj<jend+1; jj++) {
1086               for (kk=kstart; kk<kend+1; kk++) {
1087                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1088                   cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1089                 }
1090               }
1091             }
1092           }
1093           rows[l] = l + nc*(slot);
1094         }
1095         ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1096       }
1097     }
1098   }
1099   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1100   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1101   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1102   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1103   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1104   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1105 
1106   /*
1107     For each node in the grid: we get the neighbors in the local (on processor ordering
1108     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1109     PETSc ordering.
1110   */
1111   if (!da->prealloc_only) {
1112     ierr = PetscMalloc(col*col*col*nc*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1113     ierr = PetscMemzero(values,col*col*col*nc*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1114     for (i=xs; i<xs+nx; i++) {
1115       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1116       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1117       for (j=ys; j<ys+ny; j++) {
1118         jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1119         jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1120         for (k=zs; k<zs+nz; k++) {
1121           kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1122           kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1123 
1124           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1125 
1126           cnt = 0;
1127           for (l=0; l<nc; l++) {
1128             for (ii=istart; ii<iend+1; ii++) {
1129               for (jj=jstart; jj<jend+1; jj++) {
1130                 for (kk=kstart; kk<kend+1; kk++) {
1131                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1132                     cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1133                   }
1134                 }
1135               }
1136             }
1137             rows[l] = l + nc*(slot);
1138           }
1139           ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1140         }
1141       }
1142     }
1143     ierr = PetscFree(values);CHKERRQ(ierr);
1144     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1145     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1146   }
1147   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1148   PetscFunctionReturn(0);
1149 }
1150 
1151 /* ---------------------------------------------------------------------------------*/
1152 
1153 #undef __FUNCT__
1154 #define __FUNCT__ "DMCreateMatrix_DA_1d_MPIAIJ_Fill"
1155 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da,Mat J)
1156 {
1157   PetscErrorCode         ierr;
1158   DM_DA                  *dd = (DM_DA*)da->data;
1159   PetscInt               xs,nx,i,j,gxs,gnx,row,k,l;
1160   PetscInt               m,dim,s,*cols = NULL,nc,col,cnt,*ocols;
1161   PetscInt               *ofill = dd->ofill,*dfill = dd->dfill;
1162   PetscScalar            *values;
1163   DMDABoundaryType       bx;
1164   ISLocalToGlobalMapping ltog,ltogb;
1165   PetscMPIInt            rank,size;
1166 
1167   PetscFunctionBegin;
1168   if (dd->bx == DMDA_BOUNDARY_PERIODIC) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"With fill provided not implemented with periodic boundary conditions");
1169   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)da),&rank);CHKERRQ(ierr);
1170   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)da),&size);CHKERRQ(ierr);
1171 
1172   /*
1173          nc - number of components per grid point
1174          col - number of colors needed in one direction for single component problem
1175 
1176   */
1177   ierr = DMDAGetInfo(da,&dim,&m,0,0,0,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr);
1178   col  = 2*s + 1;
1179 
1180   ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
1181   ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
1182 
1183   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1184   ierr = PetscMalloc2(nx*nc,PetscInt,&cols,nx*nc,PetscInt,&ocols);CHKERRQ(ierr);
1185   ierr = PetscMemzero(cols,nx*nc*sizeof(PetscInt));CHKERRQ(ierr);
1186   ierr = PetscMemzero(ocols,nx*nc*sizeof(PetscInt));CHKERRQ(ierr);
1187 
1188   /*
1189         note should be smaller for first and last process with no periodic
1190         does not handle dfill
1191   */
1192   cnt = 0;
1193   /* coupling with process to the left */
1194   for (i=0; i<s; i++) {
1195     for (j=0; j<nc; j++) {
1196       ocols[cnt] = ((!rank) ? 0 : (s - i)*(ofill[j+1] - ofill[j]));
1197       cols[cnt]  = dfill[j+1] - dfill[j] + (s + i)*(ofill[j+1] - ofill[j]);
1198       cnt++;
1199     }
1200   }
1201   for (i=s; i<nx-s; i++) {
1202     for (j=0; j<nc; j++) {
1203       cols[cnt] = dfill[j+1] - dfill[j] + 2*s*(ofill[j+1] - ofill[j]);
1204       cnt++;
1205     }
1206   }
1207   /* coupling with process to the right */
1208   for (i=nx-s; i<nx; i++) {
1209     for (j=0; j<nc; j++) {
1210       ocols[cnt] = ((rank == (size-1)) ? 0 : (i - nx + s + 1)*(ofill[j+1] - ofill[j]));
1211       cols[cnt]  = dfill[j+1] - dfill[j] + (s + nx - i - 1)*(ofill[j+1] - ofill[j]);
1212       cnt++;
1213     }
1214   }
1215 
1216   ierr = MatSeqAIJSetPreallocation(J,0,cols);CHKERRQ(ierr);
1217   ierr = MatMPIAIJSetPreallocation(J,0,cols,0,ocols);CHKERRQ(ierr);
1218   ierr = PetscFree2(cols,ocols);CHKERRQ(ierr);
1219 
1220   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1221   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1222   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1223   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1224 
1225   /*
1226     For each node in the grid: we get the neighbors in the local (on processor ordering
1227     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1228     PETSc ordering.
1229   */
1230   if (!da->prealloc_only) {
1231     ierr = PetscMalloc(col*nc*nc*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1232     ierr = PetscMalloc(col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1233     ierr = PetscMemzero(values,col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1234 
1235     row = xs*nc;
1236     /* coupling with process to the left */
1237     for (i=xs; i<xs+s; i++) {
1238       for (j=0; j<nc; j++) {
1239         cnt = 0;
1240         if (rank) {
1241           for (l=0; l<s; l++) {
1242             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1243           }
1244         }
1245         if (dfill) {
1246           for (k=dfill[j]; k<dfill[j+1]; k++) {
1247             cols[cnt++] = i*nc + dfill[k];
1248           }
1249         } else {
1250           for (k=0; k<nc; k++) {
1251             cols[cnt++] = i*nc + k;
1252           }
1253         }
1254         for (l=0; l<s; l++) {
1255           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1256         }
1257         ierr = MatSetValues(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1258         row++;
1259       }
1260     }
1261     for (i=xs+s; i<xs+nx-s; i++) {
1262       for (j=0; j<nc; j++) {
1263         cnt = 0;
1264         for (l=0; l<s; l++) {
1265           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1266         }
1267         if (dfill) {
1268           for (k=dfill[j]; k<dfill[j+1]; k++) {
1269             cols[cnt++] = i*nc + dfill[k];
1270           }
1271         } else {
1272           for (k=0; k<nc; k++) {
1273             cols[cnt++] = i*nc + k;
1274           }
1275         }
1276         for (l=0; l<s; l++) {
1277           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1278         }
1279         ierr = MatSetValues(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1280         row++;
1281       }
1282     }
1283     /* coupling with process to the right */
1284     for (i=xs+nx-s; i<xs+nx; i++) {
1285       for (j=0; j<nc; j++) {
1286         cnt = 0;
1287         for (l=0; l<s; l++) {
1288           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1289         }
1290         if (dfill) {
1291           for (k=dfill[j]; k<dfill[j+1]; k++) {
1292             cols[cnt++] = i*nc + dfill[k];
1293           }
1294         } else {
1295           for (k=0; k<nc; k++) {
1296             cols[cnt++] = i*nc + k;
1297           }
1298         }
1299         if (rank < size-1) {
1300           for (l=0; l<s; l++) {
1301             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1302           }
1303         }
1304         ierr = MatSetValues(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1305         row++;
1306       }
1307     }
1308     ierr = PetscFree(values);CHKERRQ(ierr);
1309     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1310     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1311     ierr = PetscFree(cols);CHKERRQ(ierr);
1312   }
1313   PetscFunctionReturn(0);
1314 }
1315 
1316 /* ---------------------------------------------------------------------------------*/
1317 
1318 #undef __FUNCT__
1319 #define __FUNCT__ "DMCreateMatrix_DA_1d_MPIAIJ"
1320 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da,Mat J)
1321 {
1322   PetscErrorCode         ierr;
1323   PetscInt               xs,nx,i,i1,slot,gxs,gnx;
1324   PetscInt               m,dim,s,*cols = NULL,nc,*rows = NULL,col,cnt,l;
1325   PetscInt               istart,iend;
1326   PetscScalar            *values;
1327   DMDABoundaryType       bx;
1328   ISLocalToGlobalMapping ltog,ltogb;
1329 
1330   PetscFunctionBegin;
1331   /*
1332          nc - number of components per grid point
1333          col - number of colors needed in one direction for single component problem
1334 
1335   */
1336   ierr = DMDAGetInfo(da,&dim,&m,0,0,0,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr);
1337   col  = 2*s + 1;
1338 
1339   ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
1340   ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
1341 
1342   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1343   ierr = MatSeqAIJSetPreallocation(J,col*nc,0);CHKERRQ(ierr);
1344   ierr = MatMPIAIJSetPreallocation(J,col*nc,0,col*nc,0);CHKERRQ(ierr);
1345 
1346   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1347   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1348   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1349   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1350 
1351   /*
1352     For each node in the grid: we get the neighbors in the local (on processor ordering
1353     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1354     PETSc ordering.
1355   */
1356   if (!da->prealloc_only) {
1357     ierr = PetscMalloc2(nc,PetscInt,&rows,col*nc*nc,PetscInt,&cols);CHKERRQ(ierr);
1358     ierr = PetscMalloc(col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1359     ierr = PetscMemzero(values,col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1360     for (i=xs; i<xs+nx; i++) {
1361       istart = PetscMax(-s,gxs - i);
1362       iend   = PetscMin(s,gxs + gnx - i - 1);
1363       slot   = i - gxs;
1364 
1365       cnt = 0;
1366       for (l=0; l<nc; l++) {
1367         for (i1=istart; i1<iend+1; i1++) {
1368           cols[cnt++] = l + nc*(slot + i1);
1369         }
1370         rows[l] = l + nc*(slot);
1371       }
1372       ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1373     }
1374     ierr = PetscFree(values);CHKERRQ(ierr);
1375     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1376     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1377     ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1378   }
1379   PetscFunctionReturn(0);
1380 }
1381 
1382 #undef __FUNCT__
1383 #define __FUNCT__ "DMCreateMatrix_DA_2d_MPIBAIJ"
1384 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da,Mat J)
1385 {
1386   PetscErrorCode         ierr;
1387   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1388   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
1389   PetscInt               istart,iend,jstart,jend,ii,jj;
1390   MPI_Comm               comm;
1391   PetscScalar            *values;
1392   DMDABoundaryType       bx,by;
1393   DMDAStencilType        st;
1394   ISLocalToGlobalMapping ltog,ltogb;
1395 
1396   PetscFunctionBegin;
1397   /*
1398      nc - number of components per grid point
1399      col - number of colors needed in one direction for single component problem
1400   */
1401   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
1402   col  = 2*s + 1;
1403 
1404   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
1405   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
1406   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1407 
1408   ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1409 
1410   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1411   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1412 
1413   /* determine the matrix preallocation information */
1414   ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
1415   for (i=xs; i<xs+nx; i++) {
1416     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1417     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1418     for (j=ys; j<ys+ny; j++) {
1419       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1420       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1421       slot   = i - gxs + gnx*(j - gys);
1422 
1423       /* Find block columns in block row */
1424       cnt = 0;
1425       for (ii=istart; ii<iend+1; ii++) {
1426         for (jj=jstart; jj<jend+1; jj++) {
1427           if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1428             cols[cnt++] = slot + ii + gnx*jj;
1429           }
1430         }
1431       }
1432       ierr = MatPreallocateSetLocal(ltogb,1,&slot,ltogb,cnt,cols,dnz,onz);CHKERRQ(ierr);
1433     }
1434   }
1435   ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1436   ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1437   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1438 
1439   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1440   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1441 
1442   /*
1443     For each node in the grid: we get the neighbors in the local (on processor ordering
1444     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1445     PETSc ordering.
1446   */
1447   if (!da->prealloc_only) {
1448     ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1449     ierr = PetscMemzero(values,col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1450     for (i=xs; i<xs+nx; i++) {
1451       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1452       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1453       for (j=ys; j<ys+ny; j++) {
1454         jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1455         jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1456         slot = i - gxs + gnx*(j - gys);
1457         cnt  = 0;
1458         for (ii=istart; ii<iend+1; ii++) {
1459           for (jj=jstart; jj<jend+1; jj++) {
1460             if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1461               cols[cnt++] = slot + ii + gnx*jj;
1462             }
1463           }
1464         }
1465         ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1466       }
1467     }
1468     ierr = PetscFree(values);CHKERRQ(ierr);
1469     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1470     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1471   }
1472   ierr = PetscFree(cols);CHKERRQ(ierr);
1473   PetscFunctionReturn(0);
1474 }
1475 
1476 #undef __FUNCT__
1477 #define __FUNCT__ "DMCreateMatrix_DA_3d_MPIBAIJ"
1478 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da,Mat J)
1479 {
1480   PetscErrorCode         ierr;
1481   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1482   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
1483   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1484   MPI_Comm               comm;
1485   PetscScalar            *values;
1486   DMDABoundaryType       bx,by,bz;
1487   DMDAStencilType        st;
1488   ISLocalToGlobalMapping ltog,ltogb;
1489 
1490   PetscFunctionBegin;
1491   /*
1492          nc - number of components per grid point
1493          col - number of colors needed in one direction for single component problem
1494 
1495   */
1496   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1497   col  = 2*s + 1;
1498 
1499   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1500   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1501   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1502 
1503   ierr = PetscMalloc(col*col*col*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1504 
1505   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1506   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1507 
1508   /* determine the matrix preallocation information */
1509   ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1510   for (i=xs; i<xs+nx; i++) {
1511     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1512     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1513     for (j=ys; j<ys+ny; j++) {
1514       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1515       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1516       for (k=zs; k<zs+nz; k++) {
1517         kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1518         kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1519 
1520         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1521 
1522         /* Find block columns in block row */
1523         cnt = 0;
1524         for (ii=istart; ii<iend+1; ii++) {
1525           for (jj=jstart; jj<jend+1; jj++) {
1526             for (kk=kstart; kk<kend+1; kk++) {
1527               if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1528                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1529               }
1530             }
1531           }
1532         }
1533         ierr = MatPreallocateSetLocal(ltogb,1,&slot,ltogb,cnt,cols,dnz,onz);CHKERRQ(ierr);
1534       }
1535     }
1536   }
1537   ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1538   ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1539   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1540 
1541   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1542   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1543 
1544   /*
1545     For each node in the grid: we get the neighbors in the local (on processor ordering
1546     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1547     PETSc ordering.
1548   */
1549   if (!da->prealloc_only) {
1550     ierr = PetscMalloc(col*col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1551     ierr = PetscMemzero(values,col*col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1552     for (i=xs; i<xs+nx; i++) {
1553       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1554       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1555       for (j=ys; j<ys+ny; j++) {
1556         jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1557         jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1558         for (k=zs; k<zs+nz; k++) {
1559           kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1560           kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1561 
1562           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1563 
1564           cnt = 0;
1565           for (ii=istart; ii<iend+1; ii++) {
1566             for (jj=jstart; jj<jend+1; jj++) {
1567               for (kk=kstart; kk<kend+1; kk++) {
1568                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1569                   cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1570                 }
1571               }
1572             }
1573           }
1574           ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1575         }
1576       }
1577     }
1578     ierr = PetscFree(values);CHKERRQ(ierr);
1579     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1580     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1581   }
1582   ierr = PetscFree(cols);CHKERRQ(ierr);
1583   PetscFunctionReturn(0);
1584 }
1585 
1586 #undef __FUNCT__
1587 #define __FUNCT__ "L2GFilterUpperTriangular"
1588 /*
1589   This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to
1590   identify in the local ordering with periodic domain.
1591 */
1592 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog,PetscInt *row,PetscInt *cnt,PetscInt col[])
1593 {
1594   PetscErrorCode ierr;
1595   PetscInt       i,n;
1596 
1597   PetscFunctionBegin;
1598   ierr = ISLocalToGlobalMappingApply(ltog,1,row,row);CHKERRQ(ierr);
1599   ierr = ISLocalToGlobalMappingApply(ltog,*cnt,col,col);CHKERRQ(ierr);
1600   for (i=0,n=0; i<*cnt; i++) {
1601     if (col[i] >= *row) col[n++] = col[i];
1602   }
1603   *cnt = n;
1604   PetscFunctionReturn(0);
1605 }
1606 
1607 #undef __FUNCT__
1608 #define __FUNCT__ "DMCreateMatrix_DA_2d_MPISBAIJ"
1609 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da,Mat J)
1610 {
1611   PetscErrorCode         ierr;
1612   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1613   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
1614   PetscInt               istart,iend,jstart,jend,ii,jj;
1615   MPI_Comm               comm;
1616   PetscScalar            *values;
1617   DMDABoundaryType       bx,by;
1618   DMDAStencilType        st;
1619   ISLocalToGlobalMapping ltog,ltogb;
1620 
1621   PetscFunctionBegin;
1622   /*
1623      nc - number of components per grid point
1624      col - number of colors needed in one direction for single component problem
1625   */
1626   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
1627   col  = 2*s + 1;
1628 
1629   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
1630   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
1631   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1632 
1633   ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1634 
1635   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1636   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1637 
1638   /* determine the matrix preallocation information */
1639   ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
1640   for (i=xs; i<xs+nx; i++) {
1641     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1642     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1643     for (j=ys; j<ys+ny; j++) {
1644       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1645       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1646       slot   = i - gxs + gnx*(j - gys);
1647 
1648       /* Find block columns in block row */
1649       cnt = 0;
1650       for (ii=istart; ii<iend+1; ii++) {
1651         for (jj=jstart; jj<jend+1; jj++) {
1652           if (st == DMDA_STENCIL_BOX || !ii || !jj) {
1653             cols[cnt++] = slot + ii + gnx*jj;
1654           }
1655         }
1656       }
1657       ierr = L2GFilterUpperTriangular(ltogb,&slot,&cnt,cols);CHKERRQ(ierr);
1658       ierr = MatPreallocateSymmetricSet(slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
1659     }
1660   }
1661   ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1662   ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1663   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1664 
1665   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1666   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1667 
1668   /*
1669     For each node in the grid: we get the neighbors in the local (on processor ordering
1670     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1671     PETSc ordering.
1672   */
1673   if (!da->prealloc_only) {
1674     ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1675     ierr = PetscMemzero(values,col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1676     for (i=xs; i<xs+nx; i++) {
1677       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1678       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1679       for (j=ys; j<ys+ny; j++) {
1680         jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1681         jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1682         slot   = i - gxs + gnx*(j - gys);
1683 
1684         /* Find block columns in block row */
1685         cnt = 0;
1686         for (ii=istart; ii<iend+1; ii++) {
1687           for (jj=jstart; jj<jend+1; jj++) {
1688             if (st == DMDA_STENCIL_BOX || !ii || !jj) {
1689               cols[cnt++] = slot + ii + gnx*jj;
1690             }
1691           }
1692         }
1693         ierr = L2GFilterUpperTriangular(ltogb,&slot,&cnt,cols);CHKERRQ(ierr);
1694         ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1695       }
1696     }
1697     ierr = PetscFree(values);CHKERRQ(ierr);
1698     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1699     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1700   }
1701   ierr = PetscFree(cols);CHKERRQ(ierr);
1702   PetscFunctionReturn(0);
1703 }
1704 
1705 #undef __FUNCT__
1706 #define __FUNCT__ "DMCreateMatrix_DA_3d_MPISBAIJ"
1707 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da,Mat J)
1708 {
1709   PetscErrorCode         ierr;
1710   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1711   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
1712   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1713   MPI_Comm               comm;
1714   PetscScalar            *values;
1715   DMDABoundaryType       bx,by,bz;
1716   DMDAStencilType        st;
1717   ISLocalToGlobalMapping ltog,ltogb;
1718 
1719   PetscFunctionBegin;
1720   /*
1721      nc - number of components per grid point
1722      col - number of colors needed in one direction for single component problem
1723   */
1724   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1725   col  = 2*s + 1;
1726 
1727   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1728   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1729   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1730 
1731   /* create the matrix */
1732   ierr = PetscMalloc(col*col*col*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1733 
1734   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1735   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1736 
1737   /* determine the matrix preallocation information */
1738   ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1739   for (i=xs; i<xs+nx; i++) {
1740     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1741     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1742     for (j=ys; j<ys+ny; j++) {
1743       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1744       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1745       for (k=zs; k<zs+nz; k++) {
1746         kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1747         kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1748 
1749         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1750 
1751         /* Find block columns in block row */
1752         cnt = 0;
1753         for (ii=istart; ii<iend+1; ii++) {
1754           for (jj=jstart; jj<jend+1; jj++) {
1755             for (kk=kstart; kk<kend+1; kk++) {
1756               if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
1757                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1758               }
1759             }
1760           }
1761         }
1762         ierr = L2GFilterUpperTriangular(ltogb,&slot,&cnt,cols);CHKERRQ(ierr);
1763         ierr = MatPreallocateSymmetricSet(slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
1764       }
1765     }
1766   }
1767   ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1768   ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1769   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1770 
1771   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1772   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1773 
1774   /*
1775     For each node in the grid: we get the neighbors in the local (on processor ordering
1776     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1777     PETSc ordering.
1778   */
1779   if (!da->prealloc_only) {
1780     ierr = PetscMalloc(col*col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1781     ierr = PetscMemzero(values,col*col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1782     for (i=xs; i<xs+nx; i++) {
1783       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1784       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1785       for (j=ys; j<ys+ny; j++) {
1786         jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1787         jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1788         for (k=zs; k<zs+nz; k++) {
1789           kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1790           kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1791 
1792           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1793 
1794           cnt = 0;
1795           for (ii=istart; ii<iend+1; ii++) {
1796             for (jj=jstart; jj<jend+1; jj++) {
1797               for (kk=kstart; kk<kend+1; kk++) {
1798                 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
1799                   cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1800                 }
1801               }
1802             }
1803           }
1804           ierr = L2GFilterUpperTriangular(ltogb,&slot,&cnt,cols);CHKERRQ(ierr);
1805           ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1806         }
1807       }
1808     }
1809     ierr = PetscFree(values);CHKERRQ(ierr);
1810     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1811     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1812   }
1813   ierr = PetscFree(cols);CHKERRQ(ierr);
1814   PetscFunctionReturn(0);
1815 }
1816 
1817 /* ---------------------------------------------------------------------------------*/
1818 
1819 #undef __FUNCT__
1820 #define __FUNCT__ "DMCreateMatrix_DA_3d_MPIAIJ_Fill"
1821 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da,Mat J)
1822 {
1823   PetscErrorCode         ierr;
1824   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1825   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt,l,p,*dnz,*onz;
1826   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1827   DM_DA                  *dd = (DM_DA*)da->data;
1828   PetscInt               ifill_col,*dfill = dd->dfill,*ofill = dd->ofill;
1829   MPI_Comm               comm;
1830   PetscScalar            *values;
1831   DMDABoundaryType       bx,by,bz;
1832   ISLocalToGlobalMapping ltog,ltogb;
1833   DMDAStencilType        st;
1834 
1835   PetscFunctionBegin;
1836   /*
1837          nc - number of components per grid point
1838          col - number of colors needed in one direction for single component problem
1839 
1840   */
1841   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1842   col  = 2*s + 1;
1843   if (bx == DMDA_BOUNDARY_PERIODIC && (m % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
1844                  by 2*stencil_width + 1\n");
1845   if (by == DMDA_BOUNDARY_PERIODIC && (n % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
1846                  by 2*stencil_width + 1\n");
1847   if (bz == DMDA_BOUNDARY_PERIODIC && (p % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
1848                  by 2*stencil_width + 1\n");
1849 
1850   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1851   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1852   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1853 
1854   ierr = PetscMalloc(col*col*col*nc*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1855   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1856   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1857 
1858   /* determine the matrix preallocation information */
1859   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1860 
1861 
1862   for (i=xs; i<xs+nx; i++) {
1863     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1864     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1865     for (j=ys; j<ys+ny; j++) {
1866       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1867       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1868       for (k=zs; k<zs+nz; k++) {
1869         kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1870         kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1871 
1872         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1873 
1874         for (l=0; l<nc; l++) {
1875           cnt = 0;
1876           for (ii=istart; ii<iend+1; ii++) {
1877             for (jj=jstart; jj<jend+1; jj++) {
1878               for (kk=kstart; kk<kend+1; kk++) {
1879                 if (ii || jj || kk) {
1880                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1881                     for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1882                   }
1883                 } else {
1884                   if (dfill) {
1885                     for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1886                   } else {
1887                     for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1888                   }
1889                 }
1890               }
1891             }
1892           }
1893           row  = l + nc*(slot);
1894           ierr = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1895         }
1896       }
1897     }
1898   }
1899   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1900   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1901   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1902   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1903   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1904 
1905   /*
1906     For each node in the grid: we get the neighbors in the local (on processor ordering
1907     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1908     PETSc ordering.
1909   */
1910   if (!da->prealloc_only) {
1911     ierr = PetscMalloc(col*col*col*nc*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1912     ierr = PetscMemzero(values,col*col*col*nc*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1913     for (i=xs; i<xs+nx; i++) {
1914       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1915       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1916       for (j=ys; j<ys+ny; j++) {
1917         jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1918         jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1919         for (k=zs; k<zs+nz; k++) {
1920           kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1921           kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1922 
1923           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1924 
1925           for (l=0; l<nc; l++) {
1926             cnt = 0;
1927             for (ii=istart; ii<iend+1; ii++) {
1928               for (jj=jstart; jj<jend+1; jj++) {
1929                 for (kk=kstart; kk<kend+1; kk++) {
1930                   if (ii || jj || kk) {
1931                     if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1932                       for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1933                     }
1934                   } else {
1935                     if (dfill) {
1936                       for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1937                     } else {
1938                       for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1939                     }
1940                   }
1941                 }
1942               }
1943             }
1944             row  = l + nc*(slot);
1945             ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1946           }
1947         }
1948       }
1949     }
1950     ierr = PetscFree(values);CHKERRQ(ierr);
1951     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1952     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1953   }
1954   ierr = PetscFree(cols);CHKERRQ(ierr);
1955   PetscFunctionReturn(0);
1956 }
1957