xref: /petsc/src/dm/impls/da/fdda.c (revision 82f516cc2a80c5c0e712e5bfc0bf40989ffef740)
1 
2 #include <petsc-private/daimpl.h> /*I      "petscdmda.h"     I*/
3 #include <petscmat.h>
4 #include <petsc-private/matimpl.h>
5 
6 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM,ISColoringType,ISColoring*);
7 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM,ISColoringType,ISColoring*);
8 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM,ISColoringType,ISColoring*);
9 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM,ISColoringType,ISColoring*);
10 
11 /*
12    For ghost i that may be negative or greater than the upper bound this
13   maps it into the 0:m-1 range using periodicity
14 */
15 #define SetInRange(i,m) ((i < 0) ? m+i : ((i >= m) ? i-m : i))
16 
17 #undef __FUNCT__
18 #define __FUNCT__ "DMDASetBlockFills_Private"
19 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill,PetscInt w,PetscInt **rfill)
20 {
21   PetscErrorCode ierr;
22   PetscInt       i,j,nz,*fill;
23 
24   PetscFunctionBegin;
25   if (!dfill) PetscFunctionReturn(0);
26 
27   /* count number nonzeros */
28   nz = 0;
29   for (i=0; i<w; i++) {
30     for (j=0; j<w; j++) {
31       if (dfill[w*i+j]) nz++;
32     }
33   }
34   ierr = PetscMalloc((nz + w + 1)*sizeof(PetscInt),&fill);CHKERRQ(ierr);
35   /* construct modified CSR storage of nonzero structure */
36   /*  fill[0 -- w] marks starts of each row of column indices (and end of last row)
37    so fill[1] - fill[0] gives number of nonzeros in first row etc */
38   nz = w + 1;
39   for (i=0; i<w; i++) {
40     fill[i] = nz;
41     for (j=0; j<w; j++) {
42       if (dfill[w*i+j]) {
43         fill[nz] = j;
44         nz++;
45       }
46     }
47   }
48   fill[w] = nz;
49 
50   *rfill = fill;
51   PetscFunctionReturn(0);
52 }
53 
54 #undef __FUNCT__
55 #define __FUNCT__ "DMDASetBlockFills"
56 /*@
57     DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem
58     of the matrix returned by DMCreateMatrix().
59 
60     Logically Collective on DMDA
61 
62     Input Parameter:
63 +   da - the distributed array
64 .   dfill - the fill pattern in the diagonal block (may be NULL, means use dense block)
65 -   ofill - the fill pattern in the off-diagonal blocks
66 
67 
68     Level: developer
69 
70     Notes: This only makes sense when you are doing multicomponent problems but using the
71        MPIAIJ matrix format
72 
73            The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries
74        representing coupling and 0 entries for missing coupling. For example
75 $             dfill[9] = {1, 0, 0,
76 $                         1, 1, 0,
77 $                         0, 1, 1}
78        means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with
79        itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the
80        diagonal block).
81 
82      DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
83      can be represented in the dfill, ofill format
84 
85    Contributed by Glenn Hammond
86 
87 .seealso DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly()
88 
89 @*/
90 PetscErrorCode  DMDASetBlockFills(DM da,const PetscInt *dfill,const PetscInt *ofill)
91 {
92   DM_DA          *dd = (DM_DA*)da->data;
93   PetscErrorCode ierr;
94   PetscInt       i,k,cnt = 1;
95 
96   PetscFunctionBegin;
97   ierr = DMDASetBlockFills_Private(dfill,dd->w,&dd->dfill);CHKERRQ(ierr);
98   ierr = DMDASetBlockFills_Private(ofill,dd->w,&dd->ofill);CHKERRQ(ierr);
99 
100   /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of
101    columns to the left with any nonzeros in them plus 1 */
102   ierr = PetscMalloc(dd->w*sizeof(PetscBool),&dd->ofillcols);CHKERRQ(ierr);
103   ierr = PetscMemzero(dd->ofillcols,dd->w*sizeof(PetscBool));CHKERRQ(ierr);
104   for (i=0; i<dd->w; i++) {
105     for (k=dd->ofill[i]; k<dd->ofill[i+1]; k++) dd->ofillcols[dd->ofill[k]] = 1;
106   }
107   for (i=0; i<dd->w; i++) {
108     if (dd->ofillcols[i]) {
109       dd->ofillcols[i] = cnt++;
110     }
111   }
112   PetscFunctionReturn(0);
113 }
114 
115 
116 #undef __FUNCT__
117 #define __FUNCT__ "DMCreateColoring_DA"
118 PetscErrorCode  DMCreateColoring_DA(DM da,ISColoringType ctype,MatType mtype,ISColoring *coloring)
119 {
120   PetscErrorCode   ierr;
121   PetscInt         dim,m,n,p,nc;
122   DMDABoundaryType bx,by,bz;
123   MPI_Comm         comm;
124   PetscMPIInt      size;
125   PetscBool        isBAIJ;
126   DM_DA            *dd = (DM_DA*)da->data;
127 
128   PetscFunctionBegin;
129   /*
130                                   m
131           ------------------------------------------------------
132          |                                                     |
133          |                                                     |
134          |               ----------------------                |
135          |               |                    |                |
136       n  |           yn  |                    |                |
137          |               |                    |                |
138          |               .---------------------                |
139          |             (xs,ys)     xn                          |
140          |            .                                        |
141          |         (gxs,gys)                                   |
142          |                                                     |
143           -----------------------------------------------------
144   */
145 
146   /*
147          nc - number of components per grid point
148          col - number of colors needed in one direction for single component problem
149 
150   */
151   ierr = DMDAGetInfo(da,&dim,0,0,0,&m,&n,&p,&nc,0,&bx,&by,&bz,0);CHKERRQ(ierr);
152 
153   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
154   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
155   if (ctype == IS_COLORING_GHOSTED) {
156     if (size == 1) {
157       ctype = IS_COLORING_GLOBAL;
158     } else if (dim > 1) {
159       if ((m==1 && bx == DMDA_BOUNDARY_PERIODIC) || (n==1 && by == DMDA_BOUNDARY_PERIODIC) || (p==1 && bz == DMDA_BOUNDARY_PERIODIC)) {
160         SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"IS_COLORING_GHOSTED cannot be used for periodic boundary condition having both ends of the domain  on the same process");
161       }
162     }
163   }
164 
165   /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ
166      matrices is for the blocks, not the individual matrix elements  */
167   ierr = PetscStrcmp(mtype,MATBAIJ,&isBAIJ);CHKERRQ(ierr);
168   if (!isBAIJ) {ierr = PetscStrcmp(mtype,MATMPIBAIJ,&isBAIJ);CHKERRQ(ierr);}
169   if (!isBAIJ) {ierr = PetscStrcmp(mtype,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);}
170   if (isBAIJ) {
171     dd->w  = 1;
172     dd->xs = dd->xs/nc;
173     dd->xe = dd->xe/nc;
174     dd->Xs = dd->Xs/nc;
175     dd->Xe = dd->Xe/nc;
176   }
177 
178   /*
179      We do not provide a getcoloring function in the DMDA operations because
180    the basic DMDA does not know about matrices. We think of DMDA as being more
181    more low-level then matrices.
182   */
183   if (dim == 1) {
184     ierr = DMCreateColoring_DA_1d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
185   } else if (dim == 2) {
186     ierr =  DMCreateColoring_DA_2d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
187   } else if (dim == 3) {
188     ierr =  DMCreateColoring_DA_3d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
189   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not done for %D dimension, send us mail petsc-maint@mcs.anl.gov for code",dim);
190   if (isBAIJ) {
191     dd->w  = nc;
192     dd->xs = dd->xs*nc;
193     dd->xe = dd->xe*nc;
194     dd->Xs = dd->Xs*nc;
195     dd->Xe = dd->Xe*nc;
196   }
197   PetscFunctionReturn(0);
198 }
199 
200 /* ---------------------------------------------------------------------------------*/
201 
202 #undef __FUNCT__
203 #define __FUNCT__ "DMCreateColoring_DA_2d_MPIAIJ"
204 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
205 {
206   PetscErrorCode   ierr;
207   PetscInt         xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,M,N,dim,s,k,nc,col;
208   PetscInt         ncolors;
209   MPI_Comm         comm;
210   DMDABoundaryType bx,by;
211   DMDAStencilType  st;
212   ISColoringValue  *colors;
213   DM_DA            *dd = (DM_DA*)da->data;
214 
215   PetscFunctionBegin;
216   /*
217          nc - number of components per grid point
218          col - number of colors needed in one direction for single component problem
219 
220   */
221   ierr = DMDAGetInfo(da,&dim,&m,&n,0,&M,&N,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
222   col  = 2*s + 1;
223   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
224   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
225   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
226 
227   /* special case as taught to us by Paul Hovland */
228   if (st == DMDA_STENCIL_STAR && s == 1) {
229     ierr = DMCreateColoring_DA_2d_5pt_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
230   } else {
231 
232     if (bx == DMDA_BOUNDARY_PERIODIC && (m % col)) SETERRQ2(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X (%d) is divisible\n\
233                                                             by 2*stencil_width + 1 (%d)\n", m, col);
234     if (by == DMDA_BOUNDARY_PERIODIC && (n % col)) SETERRQ2(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y (%d) is divisible\n\
235                                                             by 2*stencil_width + 1 (%d)\n", n, col);
236     if (ctype == IS_COLORING_GLOBAL) {
237       if (!dd->localcoloring) {
238         ierr = PetscMalloc(nc*nx*ny*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
239         ii   = 0;
240         for (j=ys; j<ys+ny; j++) {
241           for (i=xs; i<xs+nx; i++) {
242             for (k=0; k<nc; k++) {
243               colors[ii++] = k + nc*((i % col) + col*(j % col));
244             }
245           }
246         }
247         ncolors = nc + nc*(col-1 + col*(col-1));
248         ierr    = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,&dd->localcoloring);CHKERRQ(ierr);
249       }
250       *coloring = dd->localcoloring;
251     } else if (ctype == IS_COLORING_GHOSTED) {
252       if (!dd->ghostedcoloring) {
253         ierr = PetscMalloc(nc*gnx*gny*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
254         ii   = 0;
255         for (j=gys; j<gys+gny; j++) {
256           for (i=gxs; i<gxs+gnx; i++) {
257             for (k=0; k<nc; k++) {
258               /* the complicated stuff is to handle periodic boundaries */
259               colors[ii++] = k + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col));
260             }
261           }
262         }
263         ncolors = nc + nc*(col - 1 + col*(col-1));
264         ierr    = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,&dd->ghostedcoloring);CHKERRQ(ierr);
265         /* PetscIntView(ncolors,(PetscInt*)colors,0); */
266 
267         ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_GHOSTED);CHKERRQ(ierr);
268       }
269       *coloring = dd->ghostedcoloring;
270     } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
271   }
272   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
273   PetscFunctionReturn(0);
274 }
275 
276 /* ---------------------------------------------------------------------------------*/
277 
278 #undef __FUNCT__
279 #define __FUNCT__ "DMCreateColoring_DA_3d_MPIAIJ"
280 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
281 {
282   PetscErrorCode   ierr;
283   PetscInt         xs,ys,nx,ny,i,j,gxs,gys,gnx,gny,m,n,p,dim,s,k,nc,col,zs,gzs,ii,l,nz,gnz,M,N,P;
284   PetscInt         ncolors;
285   MPI_Comm         comm;
286   DMDABoundaryType bx,by,bz;
287   DMDAStencilType  st;
288   ISColoringValue  *colors;
289   DM_DA            *dd = (DM_DA*)da->data;
290 
291   PetscFunctionBegin;
292   /*
293          nc - number of components per grid point
294          col - number of colors needed in one direction for single component problem
295 
296   */
297   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
298   col  = 2*s + 1;
299   if (bx == DMDA_BOUNDARY_PERIODIC && (m % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
300                                                          by 2*stencil_width + 1\n");
301   if (by == DMDA_BOUNDARY_PERIODIC && (n % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
302                                                          by 2*stencil_width + 1\n");
303   if (bz == DMDA_BOUNDARY_PERIODIC && (p % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
304                                                          by 2*stencil_width + 1\n");
305 
306   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
307   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
308   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
309 
310   /* create the coloring */
311   if (ctype == IS_COLORING_GLOBAL) {
312     if (!dd->localcoloring) {
313       ierr = PetscMalloc(nc*nx*ny*nz*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
314       ii   = 0;
315       for (k=zs; k<zs+nz; k++) {
316         for (j=ys; j<ys+ny; j++) {
317           for (i=xs; i<xs+nx; i++) {
318             for (l=0; l<nc; l++) {
319               colors[ii++] = l + nc*((i % col) + col*(j % col) + col*col*(k % col));
320             }
321           }
322         }
323       }
324       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
325       ierr    = ISColoringCreate(comm,ncolors,nc*nx*ny*nz,colors,&dd->localcoloring);CHKERRQ(ierr);
326     }
327     *coloring = dd->localcoloring;
328   } else if (ctype == IS_COLORING_GHOSTED) {
329     if (!dd->ghostedcoloring) {
330       ierr = PetscMalloc(nc*gnx*gny*gnz*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
331       ii   = 0;
332       for (k=gzs; k<gzs+gnz; k++) {
333         for (j=gys; j<gys+gny; j++) {
334           for (i=gxs; i<gxs+gnx; i++) {
335             for (l=0; l<nc; l++) {
336               /* the complicated stuff is to handle periodic boundaries */
337               colors[ii++] = l + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col) + col*col*(SetInRange(k,p) % col));
338             }
339           }
340         }
341       }
342       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
343       ierr    = ISColoringCreate(comm,ncolors,nc*gnx*gny*gnz,colors,&dd->ghostedcoloring);CHKERRQ(ierr);
344       ierr    = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_GHOSTED);CHKERRQ(ierr);
345     }
346     *coloring = dd->ghostedcoloring;
347   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
348   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
349   PetscFunctionReturn(0);
350 }
351 
352 /* ---------------------------------------------------------------------------------*/
353 
354 #undef __FUNCT__
355 #define __FUNCT__ "DMCreateColoring_DA_1d_MPIAIJ"
356 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
357 {
358   PetscErrorCode   ierr;
359   PetscInt         xs,nx,i,i1,gxs,gnx,l,m,M,dim,s,nc,col;
360   PetscInt         ncolors;
361   MPI_Comm         comm;
362   DMDABoundaryType bx;
363   ISColoringValue  *colors;
364   DM_DA            *dd = (DM_DA*)da->data;
365 
366   PetscFunctionBegin;
367   /*
368          nc - number of components per grid point
369          col - number of colors needed in one direction for single component problem
370 
371   */
372   ierr = DMDAGetInfo(da,&dim,&m,0,0,&M,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr);
373   col  = 2*s + 1;
374 
375   if (bx == DMDA_BOUNDARY_PERIODIC && (m % col)) SETERRQ2(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points %d is divisible\n\
376                                                           by 2*stencil_width + 1 %d\n",(int)m,(int)col);
377 
378   ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
379   ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
380   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
381 
382   /* create the coloring */
383   if (ctype == IS_COLORING_GLOBAL) {
384     if (!dd->localcoloring) {
385       ierr = PetscMalloc(nc*nx*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
386       if (dd->ofillcols) {
387         PetscInt tc = 0;
388         for (i=0; i<nc; i++) tc += (PetscInt) (dd->ofillcols[i] > 0);
389         i1 = 0;
390         for (i=xs; i<xs+nx; i++) {
391           for (l=0; l<nc; l++) {
392             if (dd->ofillcols[l] && (i % col)) {
393               colors[i1++] =  nc - 1 + tc*((i % col) - 1) + dd->ofillcols[l];
394             } else {
395               colors[i1++] = l;
396             }
397           }
398         }
399         ncolors = nc + 2*s*tc;
400       } else {
401         i1 = 0;
402         for (i=xs; i<xs+nx; i++) {
403           for (l=0; l<nc; l++) {
404             colors[i1++] = l + nc*(i % col);
405           }
406         }
407         ncolors = nc + nc*(col-1);
408       }
409       ierr = ISColoringCreate(comm,ncolors,nc*nx,colors,&dd->localcoloring);CHKERRQ(ierr);
410     }
411     *coloring = dd->localcoloring;
412   } else if (ctype == IS_COLORING_GHOSTED) {
413     if (!dd->ghostedcoloring) {
414       ierr = PetscMalloc(nc*gnx*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
415       i1   = 0;
416       for (i=gxs; i<gxs+gnx; i++) {
417         for (l=0; l<nc; l++) {
418           /* the complicated stuff is to handle periodic boundaries */
419           colors[i1++] = l + nc*(SetInRange(i,m) % col);
420         }
421       }
422       ncolors = nc + nc*(col-1);
423       ierr    = ISColoringCreate(comm,ncolors,nc*gnx,colors,&dd->ghostedcoloring);CHKERRQ(ierr);
424       ierr    = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_GHOSTED);CHKERRQ(ierr);
425     }
426     *coloring = dd->ghostedcoloring;
427   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
428   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
429   PetscFunctionReturn(0);
430 }
431 
432 #undef __FUNCT__
433 #define __FUNCT__ "DMCreateColoring_DA_2d_5pt_MPIAIJ"
434 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
435 {
436   PetscErrorCode   ierr;
437   PetscInt         xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,dim,s,k,nc;
438   PetscInt         ncolors;
439   MPI_Comm         comm;
440   DMDABoundaryType bx,by;
441   ISColoringValue  *colors;
442   DM_DA            *dd = (DM_DA*)da->data;
443 
444   PetscFunctionBegin;
445   /*
446          nc - number of components per grid point
447          col - number of colors needed in one direction for single component problem
448 
449   */
450   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,0);CHKERRQ(ierr);
451   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
452   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
453   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
454 
455   if (bx == DMDA_BOUNDARY_PERIODIC && (m % 5)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible by 5\n");
456   if (by == DMDA_BOUNDARY_PERIODIC && (n % 5)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible by 5\n");
457 
458   /* create the coloring */
459   if (ctype == IS_COLORING_GLOBAL) {
460     if (!dd->localcoloring) {
461       ierr = PetscMalloc(nc*nx*ny*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
462       ii   = 0;
463       for (j=ys; j<ys+ny; j++) {
464         for (i=xs; i<xs+nx; i++) {
465           for (k=0; k<nc; k++) {
466             colors[ii++] = k + nc*((3*j+i) % 5);
467           }
468         }
469       }
470       ncolors = 5*nc;
471       ierr    = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,&dd->localcoloring);CHKERRQ(ierr);
472     }
473     *coloring = dd->localcoloring;
474   } else if (ctype == IS_COLORING_GHOSTED) {
475     if (!dd->ghostedcoloring) {
476       ierr = PetscMalloc(nc*gnx*gny*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
477       ii = 0;
478       for (j=gys; j<gys+gny; j++) {
479         for (i=gxs; i<gxs+gnx; i++) {
480           for (k=0; k<nc; k++) {
481             colors[ii++] = k + nc*((3*SetInRange(j,n) + SetInRange(i,m)) % 5);
482           }
483         }
484       }
485       ncolors = 5*nc;
486       ierr    = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,&dd->ghostedcoloring);CHKERRQ(ierr);
487       ierr    = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_GHOSTED);CHKERRQ(ierr);
488     }
489     *coloring = dd->ghostedcoloring;
490   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
491   PetscFunctionReturn(0);
492 }
493 
494 /* =========================================================================== */
495 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM,Mat);
496 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM,Mat);
497 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM,Mat);
498 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM,Mat);
499 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM,Mat);
500 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM,Mat);
501 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM,Mat);
502 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM,Mat);
503 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM,Mat);
504 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM,Mat);
505 
506 #undef __FUNCT__
507 #define __FUNCT__ "MatSetupDM"
508 /*@C
509    MatSetupDM - Sets the DMDA that is to be used by the HYPRE_StructMatrix PETSc matrix
510 
511    Logically Collective on Mat
512 
513    Input Parameters:
514 +  mat - the matrix
515 -  da - the da
516 
517    Level: intermediate
518 
519 @*/
520 PetscErrorCode MatSetupDM(Mat mat,DM da)
521 {
522   PetscErrorCode ierr;
523 
524   PetscFunctionBegin;
525   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
526   PetscValidHeaderSpecific(da,DM_CLASSID,1);
527   ierr = PetscTryMethod(mat,"MatSetupDM_C",(Mat,DM),(mat,da));CHKERRQ(ierr);
528   PetscFunctionReturn(0);
529 }
530 
531 EXTERN_C_BEGIN
532 #undef __FUNCT__
533 #define __FUNCT__ "MatView_MPI_DA"
534 PetscErrorCode  MatView_MPI_DA(Mat A,PetscViewer viewer)
535 {
536   DM                da;
537   PetscErrorCode    ierr;
538   const char        *prefix;
539   Mat               Anatural;
540   AO                ao;
541   PetscInt          rstart,rend,*petsc,i;
542   IS                is;
543   MPI_Comm          comm;
544   PetscViewerFormat format;
545 
546   PetscFunctionBegin;
547   /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */
548   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
549   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(0);
550 
551   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
552   ierr = MatGetDM(A, &da);CHKERRQ(ierr);
553   if (!da) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
554 
555   ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);
556   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
557   ierr = PetscMalloc((rend-rstart)*sizeof(PetscInt),&petsc);CHKERRQ(ierr);
558   for (i=rstart; i<rend; i++) petsc[i-rstart] = i;
559   ierr = AOApplicationToPetsc(ao,rend-rstart,petsc);CHKERRQ(ierr);
560   ierr = ISCreateGeneral(comm,rend-rstart,petsc,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
561 
562   /* call viewer on natural ordering */
563   ierr = MatGetSubMatrix(A,is,is,MAT_INITIAL_MATRIX,&Anatural);CHKERRQ(ierr);
564   ierr = ISDestroy(&is);CHKERRQ(ierr);
565   ierr = PetscObjectGetOptionsPrefix((PetscObject)A,&prefix);CHKERRQ(ierr);
566   ierr = PetscObjectSetOptionsPrefix((PetscObject)Anatural,prefix);CHKERRQ(ierr);
567   ierr = PetscObjectSetName((PetscObject)Anatural,((PetscObject)A)->name);CHKERRQ(ierr);
568   ierr = MatView(Anatural,viewer);CHKERRQ(ierr);
569   ierr = MatDestroy(&Anatural);CHKERRQ(ierr);
570   PetscFunctionReturn(0);
571 }
572 EXTERN_C_END
573 
574 EXTERN_C_BEGIN
575 #undef __FUNCT__
576 #define __FUNCT__ "MatLoad_MPI_DA"
577 PetscErrorCode  MatLoad_MPI_DA(Mat A,PetscViewer viewer)
578 {
579   DM             da;
580   PetscErrorCode ierr;
581   Mat            Anatural,Aapp;
582   AO             ao;
583   PetscInt       rstart,rend,*app,i;
584   IS             is;
585   MPI_Comm       comm;
586 
587   PetscFunctionBegin;
588   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
589   ierr = MatGetDM(A, &da);CHKERRQ(ierr);
590   if (!da) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
591 
592   /* Load the matrix in natural ordering */
593   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Anatural);CHKERRQ(ierr);
594   ierr = MatSetType(Anatural,((PetscObject)A)->type_name);CHKERRQ(ierr);
595   ierr = MatSetSizes(Anatural,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
596   ierr = MatLoad(Anatural,viewer);CHKERRQ(ierr);
597 
598   /* Map natural ordering to application ordering and create IS */
599   ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);
600   ierr = MatGetOwnershipRange(Anatural,&rstart,&rend);CHKERRQ(ierr);
601   ierr = PetscMalloc((rend-rstart)*sizeof(PetscInt),&app);CHKERRQ(ierr);
602   for (i=rstart; i<rend; i++) app[i-rstart] = i;
603   ierr = AOPetscToApplication(ao,rend-rstart,app);CHKERRQ(ierr);
604   ierr = ISCreateGeneral(comm,rend-rstart,app,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
605 
606   /* Do permutation and replace header */
607   ierr = MatGetSubMatrix(Anatural,is,is,MAT_INITIAL_MATRIX,&Aapp);CHKERRQ(ierr);
608   ierr = MatHeaderReplace(A,Aapp);CHKERRQ(ierr);
609   ierr = ISDestroy(&is);CHKERRQ(ierr);
610   ierr = MatDestroy(&Anatural);CHKERRQ(ierr);
611   PetscFunctionReturn(0);
612 }
613 EXTERN_C_END
614 
615 #undef __FUNCT__
616 #define __FUNCT__ "DMCreateMatrix_DA"
617 PetscErrorCode DMCreateMatrix_DA(DM da, MatType mtype,Mat *J)
618 {
619   PetscErrorCode ierr;
620   PetscInt       dim,dof,nx,ny,nz,dims[3],starts[3],M,N,P;
621   Mat            A;
622   MPI_Comm       comm;
623   MatType        Atype;
624   PetscSection   section, sectionGlobal;
625   void           (*aij)(void)=NULL,(*baij)(void)=NULL,(*sbaij)(void)=NULL;
626   MatType        ttype[256];
627   PetscBool      flg;
628   PetscMPIInt    size;
629   DM_DA          *dd = (DM_DA*)da->data;
630 
631   PetscFunctionBegin;
632 #if !defined(PETSC_USE_DYNAMIC_LIBRARIES)
633   ierr = MatInitializePackage(NULL);CHKERRQ(ierr);
634 #endif
635   if (!mtype) mtype = MATAIJ;
636   ierr = PetscStrcpy((char*)ttype,mtype);CHKERRQ(ierr);
637   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)da),((PetscObject)da)->prefix,"DMDA options","Mat");CHKERRQ(ierr);
638   ierr = PetscOptionsList("-dm_mat_type","Matrix type","MatSetType",MatList,mtype,(char*)ttype,256,&flg);CHKERRQ(ierr);
639   ierr = PetscOptionsEnd();
640 
641   ierr = DMGetDefaultSection(da, &section);CHKERRQ(ierr);
642   if (section) {
643     PetscInt  bs = -1;
644     PetscInt  localSize;
645     PetscBool isShell, isBlock, isSeqBlock, isMPIBlock, isSymBlock, isSymSeqBlock, isSymMPIBlock, isSymmetric;
646 
647     ierr = DMGetDefaultGlobalSection(da, &sectionGlobal);CHKERRQ(ierr);
648     ierr = PetscSectionGetConstrainedStorageSize(sectionGlobal, &localSize);CHKERRQ(ierr);
649     ierr = MatCreate(PetscObjectComm((PetscObject)da), J);CHKERRQ(ierr);
650     ierr = MatSetSizes(*J, localSize, localSize, PETSC_DETERMINE, PETSC_DETERMINE);CHKERRQ(ierr);
651     ierr = MatSetType(*J, mtype);CHKERRQ(ierr);
652     ierr = MatSetFromOptions(*J);CHKERRQ(ierr);
653     ierr = PetscStrcmp(mtype, MATSHELL, &isShell);CHKERRQ(ierr);
654     ierr = PetscStrcmp(mtype, MATBAIJ, &isBlock);CHKERRQ(ierr);
655     ierr = PetscStrcmp(mtype, MATSEQBAIJ, &isSeqBlock);CHKERRQ(ierr);
656     ierr = PetscStrcmp(mtype, MATMPIBAIJ, &isMPIBlock);CHKERRQ(ierr);
657     ierr = PetscStrcmp(mtype, MATSBAIJ, &isSymBlock);CHKERRQ(ierr);
658     ierr = PetscStrcmp(mtype, MATSEQSBAIJ, &isSymSeqBlock);CHKERRQ(ierr);
659     ierr = PetscStrcmp(mtype, MATMPISBAIJ, &isSymMPIBlock);CHKERRQ(ierr);
660     /* Check for symmetric storage */
661     isSymmetric = (PetscBool) (isSymBlock || isSymSeqBlock || isSymMPIBlock);
662     if (isSymmetric) {
663       ierr = MatSetOption(*J, MAT_IGNORE_LOWER_TRIANGULAR, PETSC_TRUE);CHKERRQ(ierr);
664     }
665     if (!isShell) {
666       PetscInt *dnz, *onz, *dnzu, *onzu, bsLocal;
667 
668       if (bs < 0) {
669         if (isBlock || isSeqBlock || isMPIBlock || isSymBlock || isSymSeqBlock || isSymMPIBlock) {
670           PetscInt pStart, pEnd, p, dof;
671 
672           ierr = PetscSectionGetChart(sectionGlobal, &pStart, &pEnd);CHKERRQ(ierr);
673           for (p = pStart; p < pEnd; ++p) {
674             ierr = PetscSectionGetDof(sectionGlobal, p, &dof);CHKERRQ(ierr);
675             if (dof) {
676               bs = dof;
677               break;
678             }
679           }
680         } else {
681           bs = 1;
682         }
683         /* Must have same blocksize on all procs (some might have no points) */
684         bsLocal = bs;
685         ierr    = MPI_Allreduce(&bsLocal, &bs, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)da));CHKERRQ(ierr);
686       }
687       ierr = PetscMalloc4(localSize/bs, PetscInt, &dnz, localSize/bs, PetscInt, &onz, localSize/bs, PetscInt, &dnzu, localSize/bs, PetscInt, &onzu);CHKERRQ(ierr);
688       ierr = PetscMemzero(dnz,  localSize/bs * sizeof(PetscInt));CHKERRQ(ierr);
689       ierr = PetscMemzero(onz,  localSize/bs * sizeof(PetscInt));CHKERRQ(ierr);
690       ierr = PetscMemzero(dnzu, localSize/bs * sizeof(PetscInt));CHKERRQ(ierr);
691       ierr = PetscMemzero(onzu, localSize/bs * sizeof(PetscInt));CHKERRQ(ierr);
692       /* ierr = DMPlexPreallocateOperator(dm, bs, section, sectionGlobal, dnz, onz, dnzu, onzu, *J, fillMatrix);CHKERRQ(ierr); */
693       ierr = PetscFree4(dnz, onz, dnzu, onzu);CHKERRQ(ierr);
694     }
695   }
696   /*
697                                   m
698           ------------------------------------------------------
699          |                                                     |
700          |                                                     |
701          |               ----------------------                |
702          |               |                    |                |
703       n  |           ny  |                    |                |
704          |               |                    |                |
705          |               .---------------------                |
706          |             (xs,ys)     nx                          |
707          |            .                                        |
708          |         (gxs,gys)                                   |
709          |                                                     |
710           -----------------------------------------------------
711   */
712 
713   /*
714          nc - number of components per grid point
715          col - number of colors needed in one direction for single component problem
716 
717   */
718   M   = dd->M;
719   N   = dd->N;
720   P   = dd->P;
721   dim = dd->dim;
722   dof = dd->w;
723   /* ierr = DMDAGetInfo(da,&dim,&M,&N,&P,0,0,0,&dof,0,0,0,0,0);CHKERRQ(ierr); */
724   ierr = DMDAGetCorners(da,0,0,0,&nx,&ny,&nz);CHKERRQ(ierr);
725   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
726   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
727   ierr = MatSetSizes(A,dof*nx*ny*nz,dof*nx*ny*nz,dof*M*N*P,dof*M*N*P);CHKERRQ(ierr);
728   ierr = MatSetType(A,(MatType)ttype);CHKERRQ(ierr);
729   ierr = MatSetDM(A,da);CHKERRQ(ierr);
730   ierr = MatSetFromOptions(A);CHKERRQ(ierr);
731   ierr = MatGetType(A,&Atype);CHKERRQ(ierr);
732   /*
733      We do not provide a getmatrix function in the DMDA operations because
734    the basic DMDA does not know about matrices. We think of DMDA as being more
735    more low-level than matrices. This is kind of cheating but, cause sometimes
736    we think of DMDA has higher level than matrices.
737 
738      We could switch based on Atype (or mtype), but we do not since the
739    specialized setting routines depend only the particular preallocation
740    details of the matrix, not the type itself.
741   */
742   ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJSetPreallocation_C",&aij);CHKERRQ(ierr);
743   if (!aij) {
744     ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJSetPreallocation_C",&aij);CHKERRQ(ierr);
745   }
746   if (!aij) {
747     ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIBAIJSetPreallocation_C",&baij);CHKERRQ(ierr);
748     if (!baij) {
749       ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",&baij);CHKERRQ(ierr);
750     }
751     if (!baij) {
752       ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPISBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr);
753       if (!sbaij) {
754         ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr);
755       }
756     }
757   }
758   if (aij) {
759     if (dim == 1) {
760       if (dd->ofill) {
761         ierr = DMCreateMatrix_DA_1d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
762       } else {
763         ierr = DMCreateMatrix_DA_1d_MPIAIJ(da,A);CHKERRQ(ierr);
764       }
765     } else if (dim == 2) {
766       if (dd->ofill) {
767         ierr = DMCreateMatrix_DA_2d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
768       } else {
769         ierr = DMCreateMatrix_DA_2d_MPIAIJ(da,A);CHKERRQ(ierr);
770       }
771     } else if (dim == 3) {
772       if (dd->ofill) {
773         ierr = DMCreateMatrix_DA_3d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
774       } else {
775         ierr = DMCreateMatrix_DA_3d_MPIAIJ(da,A);CHKERRQ(ierr);
776       }
777     }
778   } else if (baij) {
779     if (dim == 2) {
780       ierr = DMCreateMatrix_DA_2d_MPIBAIJ(da,A);CHKERRQ(ierr);
781     } else if (dim == 3) {
782       ierr = DMCreateMatrix_DA_3d_MPIBAIJ(da,A);CHKERRQ(ierr);
783     } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
784   } else if (sbaij) {
785     if (dim == 2) {
786       ierr = DMCreateMatrix_DA_2d_MPISBAIJ(da,A);CHKERRQ(ierr);
787     } else if (dim == 3) {
788       ierr = DMCreateMatrix_DA_3d_MPISBAIJ(da,A);CHKERRQ(ierr);
789     } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
790   } else {
791     ISLocalToGlobalMapping ltog,ltogb;
792     ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
793     ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
794     ierr = MatSetUp(A);CHKERRQ(ierr);
795     ierr = MatSetLocalToGlobalMapping(A,ltog,ltog);CHKERRQ(ierr);
796     ierr = MatSetLocalToGlobalMappingBlock(A,ltogb,ltogb);CHKERRQ(ierr);
797   }
798   ierr = DMDAGetGhostCorners(da,&starts[0],&starts[1],&starts[2],&dims[0],&dims[1],&dims[2]);CHKERRQ(ierr);
799   ierr = MatSetStencil(A,dim,dims,starts,dof);CHKERRQ(ierr);
800   ierr = MatSetDM(A,da);CHKERRQ(ierr);
801   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
802   if (size > 1) {
803     /* change viewer to display matrix in natural ordering */
804     ierr = MatShellSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA);CHKERRQ(ierr);
805     ierr = MatShellSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA);CHKERRQ(ierr);
806   }
807   *J = A;
808   PetscFunctionReturn(0);
809 }
810 
811 /* ---------------------------------------------------------------------------------*/
812 #undef __FUNCT__
813 #define __FUNCT__ "DMCreateMatrix_DA_2d_MPIAIJ"
814 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da,Mat J)
815 {
816   PetscErrorCode         ierr;
817   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p;
818   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
819   MPI_Comm               comm;
820   PetscScalar            *values;
821   DMDABoundaryType       bx,by;
822   ISLocalToGlobalMapping ltog,ltogb;
823   DMDAStencilType        st;
824 
825   PetscFunctionBegin;
826   /*
827          nc - number of components per grid point
828          col - number of colors needed in one direction for single component problem
829 
830   */
831   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
832   col  = 2*s + 1;
833   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
834   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
835   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
836 
837   ierr = PetscMalloc2(nc,PetscInt,&rows,col*col*nc*nc,PetscInt,&cols);CHKERRQ(ierr);
838   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
839   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
840 
841   /* determine the matrix preallocation information */
842   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
843   for (i=xs; i<xs+nx; i++) {
844 
845     pstart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
846     pend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
847 
848     for (j=ys; j<ys+ny; j++) {
849       slot = i - gxs + gnx*(j - gys);
850 
851       lstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
852       lend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
853 
854       cnt = 0;
855       for (k=0; k<nc; k++) {
856         for (l=lstart; l<lend+1; l++) {
857           for (p=pstart; p<pend+1; p++) {
858             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
859               cols[cnt++] = k + nc*(slot + gnx*l + p);
860             }
861           }
862         }
863         rows[k] = k + nc*(slot);
864       }
865       ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
866     }
867   }
868   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
869   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
870   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
871   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
872 
873   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
874   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
875 
876   /*
877     For each node in the grid: we get the neighbors in the local (on processor ordering
878     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
879     PETSc ordering.
880   */
881   if (!da->prealloc_only) {
882     ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
883     ierr = PetscMemzero(values,col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
884     for (i=xs; i<xs+nx; i++) {
885 
886       pstart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
887       pend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
888 
889       for (j=ys; j<ys+ny; j++) {
890         slot = i - gxs + gnx*(j - gys);
891 
892         lstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
893         lend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
894 
895         cnt = 0;
896         for (k=0; k<nc; k++) {
897           for (l=lstart; l<lend+1; l++) {
898             for (p=pstart; p<pend+1; p++) {
899               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
900                 cols[cnt++] = k + nc*(slot + gnx*l + p);
901               }
902             }
903           }
904           rows[k] = k + nc*(slot);
905         }
906         ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
907       }
908     }
909     ierr = PetscFree(values);CHKERRQ(ierr);
910     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
911     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
912   }
913   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
914   PetscFunctionReturn(0);
915 }
916 
917 #undef __FUNCT__
918 #define __FUNCT__ "DMCreateMatrix_DA_2d_MPIAIJ_Fill"
919 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da,Mat J)
920 {
921   PetscErrorCode         ierr;
922   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
923   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt,l,p;
924   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
925   DM_DA                  *dd = (DM_DA*)da->data;
926   PetscInt               ifill_col,*ofill = dd->ofill, *dfill = dd->dfill;
927   MPI_Comm               comm;
928   PetscScalar            *values;
929   DMDABoundaryType       bx,by;
930   ISLocalToGlobalMapping ltog,ltogb;
931   DMDAStencilType        st;
932 
933   PetscFunctionBegin;
934   /*
935          nc - number of components per grid point
936          col - number of colors needed in one direction for single component problem
937 
938   */
939   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
940   col  = 2*s + 1;
941   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
942   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
943   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
944 
945   ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscInt),&cols);CHKERRQ(ierr);
946   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
947   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
948 
949   /* determine the matrix preallocation information */
950   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
951   for (i=xs; i<xs+nx; i++) {
952 
953     pstart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
954     pend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
955 
956     for (j=ys; j<ys+ny; j++) {
957       slot = i - gxs + gnx*(j - gys);
958 
959       lstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
960       lend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
961 
962       for (k=0; k<nc; k++) {
963         cnt = 0;
964         for (l=lstart; l<lend+1; l++) {
965           for (p=pstart; p<pend+1; p++) {
966             if (l || p) {
967               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
968                 for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p);
969               }
970             } else {
971               if (dfill) {
972                 for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p);
973               } else {
974                 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p);
975               }
976             }
977           }
978         }
979         row  = k + nc*(slot);
980         ierr = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
981       }
982     }
983   }
984   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
985   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
986   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
987   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
988   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
989 
990   /*
991     For each node in the grid: we get the neighbors in the local (on processor ordering
992     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
993     PETSc ordering.
994   */
995   if (!da->prealloc_only) {
996     ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
997     ierr = PetscMemzero(values,col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
998     for (i=xs; i<xs+nx; i++) {
999 
1000       pstart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1001       pend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1002 
1003       for (j=ys; j<ys+ny; j++) {
1004         slot = i - gxs + gnx*(j - gys);
1005 
1006         lstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1007         lend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1008 
1009         for (k=0; k<nc; k++) {
1010           cnt = 0;
1011           for (l=lstart; l<lend+1; l++) {
1012             for (p=pstart; p<pend+1; p++) {
1013               if (l || p) {
1014                 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
1015                   for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p);
1016                 }
1017               } else {
1018                 if (dfill) {
1019                   for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p);
1020                 } else {
1021                   for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p);
1022                 }
1023               }
1024             }
1025           }
1026           row  = k + nc*(slot);
1027           ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1028         }
1029       }
1030     }
1031     ierr = PetscFree(values);CHKERRQ(ierr);
1032     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1033     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1034   }
1035   ierr = PetscFree(cols);CHKERRQ(ierr);
1036   PetscFunctionReturn(0);
1037 }
1038 
1039 /* ---------------------------------------------------------------------------------*/
1040 
1041 #undef __FUNCT__
1042 #define __FUNCT__ "DMCreateMatrix_DA_3d_MPIAIJ"
1043 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da,Mat J)
1044 {
1045   PetscErrorCode         ierr;
1046   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1047   PetscInt               m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL;
1048   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1049   MPI_Comm               comm;
1050   PetscScalar            *values;
1051   DMDABoundaryType       bx,by,bz;
1052   ISLocalToGlobalMapping ltog,ltogb;
1053   DMDAStencilType        st;
1054 
1055   PetscFunctionBegin;
1056   /*
1057          nc - number of components per grid point
1058          col - number of colors needed in one direction for single component problem
1059 
1060   */
1061   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1062   col  = 2*s + 1;
1063 
1064   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1065   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1066   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1067 
1068   ierr = PetscMalloc2(nc,PetscInt,&rows,col*col*col*nc*nc,PetscInt,&cols);CHKERRQ(ierr);
1069   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1070   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1071 
1072   /* determine the matrix preallocation information */
1073   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1074   for (i=xs; i<xs+nx; i++) {
1075     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1076     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1077     for (j=ys; j<ys+ny; j++) {
1078       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1079       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1080       for (k=zs; k<zs+nz; k++) {
1081         kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1082         kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1083 
1084         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1085 
1086         cnt = 0;
1087         for (l=0; l<nc; l++) {
1088           for (ii=istart; ii<iend+1; ii++) {
1089             for (jj=jstart; jj<jend+1; jj++) {
1090               for (kk=kstart; kk<kend+1; kk++) {
1091                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1092                   cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1093                 }
1094               }
1095             }
1096           }
1097           rows[l] = l + nc*(slot);
1098         }
1099         ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1100       }
1101     }
1102   }
1103   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1104   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1105   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1106   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1107   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1108   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1109 
1110   /*
1111     For each node in the grid: we get the neighbors in the local (on processor ordering
1112     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1113     PETSc ordering.
1114   */
1115   if (!da->prealloc_only) {
1116     ierr = PetscMalloc(col*col*col*nc*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1117     ierr = PetscMemzero(values,col*col*col*nc*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1118     for (i=xs; i<xs+nx; i++) {
1119       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1120       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1121       for (j=ys; j<ys+ny; j++) {
1122         jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1123         jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1124         for (k=zs; k<zs+nz; k++) {
1125           kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1126           kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1127 
1128           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1129 
1130           cnt = 0;
1131           for (l=0; l<nc; l++) {
1132             for (ii=istart; ii<iend+1; ii++) {
1133               for (jj=jstart; jj<jend+1; jj++) {
1134                 for (kk=kstart; kk<kend+1; kk++) {
1135                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1136                     cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1137                   }
1138                 }
1139               }
1140             }
1141             rows[l] = l + nc*(slot);
1142           }
1143           ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1144         }
1145       }
1146     }
1147     ierr = PetscFree(values);CHKERRQ(ierr);
1148     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1149     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1150   }
1151   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1152   PetscFunctionReturn(0);
1153 }
1154 
1155 /* ---------------------------------------------------------------------------------*/
1156 
1157 #undef __FUNCT__
1158 #define __FUNCT__ "DMCreateMatrix_DA_1d_MPIAIJ_Fill"
1159 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da,Mat J)
1160 {
1161   PetscErrorCode         ierr;
1162   DM_DA                  *dd = (DM_DA*)da->data;
1163   PetscInt               xs,nx,i,j,gxs,gnx,row,k,l;
1164   PetscInt               m,dim,s,*cols = NULL,nc,col,cnt,*ocols;
1165   PetscInt               *ofill = dd->ofill;
1166   PetscScalar            *values;
1167   DMDABoundaryType       bx;
1168   ISLocalToGlobalMapping ltog,ltogb;
1169   PetscMPIInt            rank,size;
1170 
1171   PetscFunctionBegin;
1172   if (dd->bx == DMDA_BOUNDARY_PERIODIC) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"With fill provided not implemented with periodic boundary conditions");
1173   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)da),&rank);CHKERRQ(ierr);
1174   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)da),&size);CHKERRQ(ierr);
1175 
1176   /*
1177          nc - number of components per grid point
1178          col - number of colors needed in one direction for single component problem
1179 
1180   */
1181   ierr = DMDAGetInfo(da,&dim,&m,0,0,0,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr);
1182   col  = 2*s + 1;
1183 
1184   ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
1185   ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
1186 
1187   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1188   ierr = PetscMalloc2(nx*nc,PetscInt,&cols,nx*nc,PetscInt,&ocols);CHKERRQ(ierr);
1189   ierr = PetscMemzero(cols,nx*nc*sizeof(PetscInt));CHKERRQ(ierr);
1190   ierr = PetscMemzero(ocols,nx*nc*sizeof(PetscInt));CHKERRQ(ierr);
1191 
1192   /*
1193         note should be smaller for first and last process with no periodic
1194         does not handle dfill
1195   */
1196   cnt = 0;
1197   /* coupling with process to the left */
1198   for (i=0; i<s; i++) {
1199     for (j=0; j<nc; j++) {
1200       ocols[cnt] = ((!rank) ? 0 : (s - i)*(ofill[j+1] - ofill[j]));
1201       cols[cnt]  = nc + (s + i)*(ofill[j+1] - ofill[j]);
1202       cnt++;
1203     }
1204   }
1205   for (i=s; i<nx-s; i++) {
1206     for (j=0; j<nc; j++) {
1207       cols[cnt] = nc + 2*s*(ofill[j+1] - ofill[j]);
1208       cnt++;
1209     }
1210   }
1211   /* coupling with process to the right */
1212   for (i=nx-s; i<nx; i++) {
1213     for (j=0; j<nc; j++) {
1214       ocols[cnt] = ((rank == (size-1)) ? 0 : (i - nx + s + 1)*(ofill[j+1] - ofill[j]));
1215       cols[cnt]  = nc + (s + nx - i - 1)*(ofill[j+1] - ofill[j]);
1216       cnt++;
1217     }
1218   }
1219 
1220   ierr = MatSeqAIJSetPreallocation(J,0,cols);CHKERRQ(ierr);
1221   ierr = MatMPIAIJSetPreallocation(J,0,cols,0,ocols);CHKERRQ(ierr);
1222   ierr = PetscFree2(cols,ocols);CHKERRQ(ierr);
1223 
1224   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1225   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1226   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1227   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1228 
1229   /*
1230     For each node in the grid: we get the neighbors in the local (on processor ordering
1231     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1232     PETSc ordering.
1233   */
1234   if (!da->prealloc_only) {
1235     ierr = PetscMalloc(col*nc*nc*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1236     ierr = PetscMalloc(col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1237     ierr = PetscMemzero(values,col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1238 
1239     row = xs*nc;
1240     /* coupling with process to the left */
1241     for (i=xs; i<xs+s; i++) {
1242       for (j=0; j<nc; j++) {
1243         cnt = 0;
1244         if (rank) {
1245           for (l=0; l<s; l++) {
1246             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1247           }
1248         }
1249         for (k=0; k<nc; k++) {
1250           cols[cnt++] = i*nc + k;
1251         }
1252         for (l=0; l<s; l++) {
1253           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1254         }
1255         ierr = MatSetValues(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1256         row++;
1257       }
1258     }
1259     for (i=xs+s; i<xs+nx-s; i++) {
1260       for (j=0; j<nc; j++) {
1261         cnt = 0;
1262         for (l=0; l<s; l++) {
1263           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1264         }
1265         for (k=0; k<nc; k++) {
1266           cols[cnt++] = i*nc + k;
1267         }
1268         for (l=0; l<s; l++) {
1269           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1270         }
1271         ierr = MatSetValues(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1272         row++;
1273       }
1274     }
1275     /* coupling with process to the right */
1276     for (i=xs+nx-s; i<xs+nx; i++) {
1277       for (j=0; j<nc; j++) {
1278         cnt = 0;
1279         for (l=0; l<s; l++) {
1280           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1281         }
1282         for (k=0; k<nc; k++) {
1283           cols[cnt++] = i*nc + k;
1284         }
1285         if (rank < size-1) {
1286           for (l=0; l<s; l++) {
1287             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1288           }
1289         }
1290         ierr = MatSetValues(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1291         row++;
1292       }
1293     }
1294     ierr = PetscFree(values);CHKERRQ(ierr);
1295     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1296     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1297     ierr = PetscFree(cols);CHKERRQ(ierr);
1298   }
1299   PetscFunctionReturn(0);
1300 }
1301 
1302 /* ---------------------------------------------------------------------------------*/
1303 
1304 #undef __FUNCT__
1305 #define __FUNCT__ "DMCreateMatrix_DA_1d_MPIAIJ"
1306 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da,Mat J)
1307 {
1308   PetscErrorCode         ierr;
1309   PetscInt               xs,nx,i,i1,slot,gxs,gnx;
1310   PetscInt               m,dim,s,*cols = NULL,nc,*rows = NULL,col,cnt,l;
1311   PetscInt               istart,iend;
1312   PetscScalar            *values;
1313   DMDABoundaryType       bx;
1314   ISLocalToGlobalMapping ltog,ltogb;
1315 
1316   PetscFunctionBegin;
1317   /*
1318          nc - number of components per grid point
1319          col - number of colors needed in one direction for single component problem
1320 
1321   */
1322   ierr = DMDAGetInfo(da,&dim,&m,0,0,0,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr);
1323   col  = 2*s + 1;
1324 
1325   ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
1326   ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
1327 
1328   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1329   ierr = MatSeqAIJSetPreallocation(J,col*nc,0);CHKERRQ(ierr);
1330   ierr = MatMPIAIJSetPreallocation(J,col*nc,0,col*nc,0);CHKERRQ(ierr);
1331 
1332   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1333   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1334   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1335   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1336 
1337   /*
1338     For each node in the grid: we get the neighbors in the local (on processor ordering
1339     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1340     PETSc ordering.
1341   */
1342   if (!da->prealloc_only) {
1343     ierr = PetscMalloc2(nc,PetscInt,&rows,col*nc*nc,PetscInt,&cols);CHKERRQ(ierr);
1344     ierr = PetscMalloc(col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1345     ierr = PetscMemzero(values,col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1346     for (i=xs; i<xs+nx; i++) {
1347       istart = PetscMax(-s,gxs - i);
1348       iend   = PetscMin(s,gxs + gnx - i - 1);
1349       slot   = i - gxs;
1350 
1351       cnt = 0;
1352       for (l=0; l<nc; l++) {
1353         for (i1=istart; i1<iend+1; i1++) {
1354           cols[cnt++] = l + nc*(slot + i1);
1355         }
1356         rows[l] = l + nc*(slot);
1357       }
1358       ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1359     }
1360     ierr = PetscFree(values);CHKERRQ(ierr);
1361     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1362     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1363     ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1364   }
1365   PetscFunctionReturn(0);
1366 }
1367 
1368 #undef __FUNCT__
1369 #define __FUNCT__ "DMCreateMatrix_DA_2d_MPIBAIJ"
1370 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da,Mat J)
1371 {
1372   PetscErrorCode         ierr;
1373   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1374   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
1375   PetscInt               istart,iend,jstart,jend,ii,jj;
1376   MPI_Comm               comm;
1377   PetscScalar            *values;
1378   DMDABoundaryType       bx,by;
1379   DMDAStencilType        st;
1380   ISLocalToGlobalMapping ltog,ltogb;
1381 
1382   PetscFunctionBegin;
1383   /*
1384      nc - number of components per grid point
1385      col - number of colors needed in one direction for single component problem
1386   */
1387   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
1388   col  = 2*s + 1;
1389 
1390   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
1391   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
1392   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1393 
1394   ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1395 
1396   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1397   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1398 
1399   /* determine the matrix preallocation information */
1400   ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
1401   for (i=xs; i<xs+nx; i++) {
1402     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1403     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1404     for (j=ys; j<ys+ny; j++) {
1405       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1406       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1407       slot   = i - gxs + gnx*(j - gys);
1408 
1409       /* Find block columns in block row */
1410       cnt = 0;
1411       for (ii=istart; ii<iend+1; ii++) {
1412         for (jj=jstart; jj<jend+1; jj++) {
1413           if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1414             cols[cnt++] = slot + ii + gnx*jj;
1415           }
1416         }
1417       }
1418       ierr = MatPreallocateSetLocal(ltogb,1,&slot,ltogb,cnt,cols,dnz,onz);CHKERRQ(ierr);
1419     }
1420   }
1421   ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1422   ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1423   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1424 
1425   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1426   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1427 
1428   /*
1429     For each node in the grid: we get the neighbors in the local (on processor ordering
1430     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1431     PETSc ordering.
1432   */
1433   if (!da->prealloc_only) {
1434     ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1435     ierr = PetscMemzero(values,col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1436     for (i=xs; i<xs+nx; i++) {
1437       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1438       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1439       for (j=ys; j<ys+ny; j++) {
1440         jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1441         jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1442         slot = i - gxs + gnx*(j - gys);
1443         cnt  = 0;
1444         for (ii=istart; ii<iend+1; ii++) {
1445           for (jj=jstart; jj<jend+1; jj++) {
1446             if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1447               cols[cnt++] = slot + ii + gnx*jj;
1448             }
1449           }
1450         }
1451         ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1452       }
1453     }
1454     ierr = PetscFree(values);CHKERRQ(ierr);
1455     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1456     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1457   }
1458   ierr = PetscFree(cols);CHKERRQ(ierr);
1459   PetscFunctionReturn(0);
1460 }
1461 
1462 #undef __FUNCT__
1463 #define __FUNCT__ "DMCreateMatrix_DA_3d_MPIBAIJ"
1464 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da,Mat J)
1465 {
1466   PetscErrorCode         ierr;
1467   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1468   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
1469   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1470   MPI_Comm               comm;
1471   PetscScalar            *values;
1472   DMDABoundaryType       bx,by,bz;
1473   DMDAStencilType        st;
1474   ISLocalToGlobalMapping ltog,ltogb;
1475 
1476   PetscFunctionBegin;
1477   /*
1478          nc - number of components per grid point
1479          col - number of colors needed in one direction for single component problem
1480 
1481   */
1482   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1483   col  = 2*s + 1;
1484 
1485   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1486   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1487   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1488 
1489   ierr = PetscMalloc(col*col*col*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1490 
1491   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1492   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1493 
1494   /* determine the matrix preallocation information */
1495   ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1496   for (i=xs; i<xs+nx; i++) {
1497     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1498     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1499     for (j=ys; j<ys+ny; j++) {
1500       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1501       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1502       for (k=zs; k<zs+nz; k++) {
1503         kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1504         kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1505 
1506         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1507 
1508         /* Find block columns in block row */
1509         cnt = 0;
1510         for (ii=istart; ii<iend+1; ii++) {
1511           for (jj=jstart; jj<jend+1; jj++) {
1512             for (kk=kstart; kk<kend+1; kk++) {
1513               if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1514                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1515               }
1516             }
1517           }
1518         }
1519         ierr = MatPreallocateSetLocal(ltogb,1,&slot,ltogb,cnt,cols,dnz,onz);CHKERRQ(ierr);
1520       }
1521     }
1522   }
1523   ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1524   ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1525   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1526 
1527   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1528   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1529 
1530   /*
1531     For each node in the grid: we get the neighbors in the local (on processor ordering
1532     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1533     PETSc ordering.
1534   */
1535   if (!da->prealloc_only) {
1536     ierr = PetscMalloc(col*col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1537     ierr = PetscMemzero(values,col*col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1538     for (i=xs; i<xs+nx; i++) {
1539       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1540       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1541       for (j=ys; j<ys+ny; j++) {
1542         jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1543         jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1544         for (k=zs; k<zs+nz; k++) {
1545           kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1546           kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1547 
1548           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1549 
1550           cnt = 0;
1551           for (ii=istart; ii<iend+1; ii++) {
1552             for (jj=jstart; jj<jend+1; jj++) {
1553               for (kk=kstart; kk<kend+1; kk++) {
1554                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1555                   cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1556                 }
1557               }
1558             }
1559           }
1560           ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1561         }
1562       }
1563     }
1564     ierr = PetscFree(values);CHKERRQ(ierr);
1565     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1566     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1567   }
1568   ierr = PetscFree(cols);CHKERRQ(ierr);
1569   PetscFunctionReturn(0);
1570 }
1571 
1572 #undef __FUNCT__
1573 #define __FUNCT__ "L2GFilterUpperTriangular"
1574 /*
1575   This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to
1576   identify in the local ordering with periodic domain.
1577 */
1578 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog,PetscInt *row,PetscInt *cnt,PetscInt col[])
1579 {
1580   PetscErrorCode ierr;
1581   PetscInt       i,n;
1582 
1583   PetscFunctionBegin;
1584   ierr = ISLocalToGlobalMappingApply(ltog,1,row,row);CHKERRQ(ierr);
1585   ierr = ISLocalToGlobalMappingApply(ltog,*cnt,col,col);CHKERRQ(ierr);
1586   for (i=0,n=0; i<*cnt; i++) {
1587     if (col[i] >= *row) col[n++] = col[i];
1588   }
1589   *cnt = n;
1590   PetscFunctionReturn(0);
1591 }
1592 
1593 #undef __FUNCT__
1594 #define __FUNCT__ "DMCreateMatrix_DA_2d_MPISBAIJ"
1595 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da,Mat J)
1596 {
1597   PetscErrorCode         ierr;
1598   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1599   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
1600   PetscInt               istart,iend,jstart,jend,ii,jj;
1601   MPI_Comm               comm;
1602   PetscScalar            *values;
1603   DMDABoundaryType       bx,by;
1604   DMDAStencilType        st;
1605   ISLocalToGlobalMapping ltog,ltogb;
1606 
1607   PetscFunctionBegin;
1608   /*
1609      nc - number of components per grid point
1610      col - number of colors needed in one direction for single component problem
1611   */
1612   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
1613   col  = 2*s + 1;
1614 
1615   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
1616   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
1617   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1618 
1619   ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1620 
1621   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1622   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1623 
1624   /* determine the matrix preallocation information */
1625   ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
1626   for (i=xs; i<xs+nx; i++) {
1627     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1628     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1629     for (j=ys; j<ys+ny; j++) {
1630       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1631       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1632       slot   = i - gxs + gnx*(j - gys);
1633 
1634       /* Find block columns in block row */
1635       cnt = 0;
1636       for (ii=istart; ii<iend+1; ii++) {
1637         for (jj=jstart; jj<jend+1; jj++) {
1638           if (st == DMDA_STENCIL_BOX || !ii || !jj) {
1639             cols[cnt++] = slot + ii + gnx*jj;
1640           }
1641         }
1642       }
1643       ierr = L2GFilterUpperTriangular(ltogb,&slot,&cnt,cols);CHKERRQ(ierr);
1644       ierr = MatPreallocateSymmetricSet(slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
1645     }
1646   }
1647   ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1648   ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1649   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1650 
1651   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1652   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1653 
1654   /*
1655     For each node in the grid: we get the neighbors in the local (on processor ordering
1656     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1657     PETSc ordering.
1658   */
1659   if (!da->prealloc_only) {
1660     ierr = PetscMalloc(col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1661     ierr = PetscMemzero(values,col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1662     for (i=xs; i<xs+nx; i++) {
1663       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1664       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1665       for (j=ys; j<ys+ny; j++) {
1666         jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1667         jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1668         slot   = i - gxs + gnx*(j - gys);
1669 
1670         /* Find block columns in block row */
1671         cnt = 0;
1672         for (ii=istart; ii<iend+1; ii++) {
1673           for (jj=jstart; jj<jend+1; jj++) {
1674             if (st == DMDA_STENCIL_BOX || !ii || !jj) {
1675               cols[cnt++] = slot + ii + gnx*jj;
1676             }
1677           }
1678         }
1679         ierr = L2GFilterUpperTriangular(ltogb,&slot,&cnt,cols);CHKERRQ(ierr);
1680         ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1681       }
1682     }
1683     ierr = PetscFree(values);CHKERRQ(ierr);
1684     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1685     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1686   }
1687   ierr = PetscFree(cols);CHKERRQ(ierr);
1688   PetscFunctionReturn(0);
1689 }
1690 
1691 #undef __FUNCT__
1692 #define __FUNCT__ "DMCreateMatrix_DA_3d_MPISBAIJ"
1693 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da,Mat J)
1694 {
1695   PetscErrorCode         ierr;
1696   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1697   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
1698   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1699   MPI_Comm               comm;
1700   PetscScalar            *values;
1701   DMDABoundaryType       bx,by,bz;
1702   DMDAStencilType        st;
1703   ISLocalToGlobalMapping ltog,ltogb;
1704 
1705   PetscFunctionBegin;
1706   /*
1707      nc - number of components per grid point
1708      col - number of colors needed in one direction for single component problem
1709   */
1710   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1711   col  = 2*s + 1;
1712 
1713   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1714   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1715   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1716 
1717   /* create the matrix */
1718   ierr = PetscMalloc(col*col*col*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1719 
1720   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1721   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1722 
1723   /* determine the matrix preallocation information */
1724   ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1725   for (i=xs; i<xs+nx; i++) {
1726     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1727     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1728     for (j=ys; j<ys+ny; j++) {
1729       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1730       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1731       for (k=zs; k<zs+nz; k++) {
1732         kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1733         kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1734 
1735         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1736 
1737         /* Find block columns in block row */
1738         cnt = 0;
1739         for (ii=istart; ii<iend+1; ii++) {
1740           for (jj=jstart; jj<jend+1; jj++) {
1741             for (kk=kstart; kk<kend+1; kk++) {
1742               if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
1743                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1744               }
1745             }
1746           }
1747         }
1748         ierr = L2GFilterUpperTriangular(ltogb,&slot,&cnt,cols);CHKERRQ(ierr);
1749         ierr = MatPreallocateSymmetricSet(slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
1750       }
1751     }
1752   }
1753   ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1754   ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1755   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1756 
1757   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1758   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1759 
1760   /*
1761     For each node in the grid: we get the neighbors in the local (on processor ordering
1762     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1763     PETSc ordering.
1764   */
1765   if (!da->prealloc_only) {
1766     ierr = PetscMalloc(col*col*col*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1767     ierr = PetscMemzero(values,col*col*col*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1768     for (i=xs; i<xs+nx; i++) {
1769       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1770       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1771       for (j=ys; j<ys+ny; j++) {
1772         jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1773         jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1774         for (k=zs; k<zs+nz; k++) {
1775           kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1776           kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1777 
1778           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1779 
1780           cnt = 0;
1781           for (ii=istart; ii<iend+1; ii++) {
1782             for (jj=jstart; jj<jend+1; jj++) {
1783               for (kk=kstart; kk<kend+1; kk++) {
1784                 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
1785                   cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1786                 }
1787               }
1788             }
1789           }
1790           ierr = L2GFilterUpperTriangular(ltogb,&slot,&cnt,cols);CHKERRQ(ierr);
1791           ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1792         }
1793       }
1794     }
1795     ierr = PetscFree(values);CHKERRQ(ierr);
1796     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1797     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1798   }
1799   ierr = PetscFree(cols);CHKERRQ(ierr);
1800   PetscFunctionReturn(0);
1801 }
1802 
1803 /* ---------------------------------------------------------------------------------*/
1804 
1805 #undef __FUNCT__
1806 #define __FUNCT__ "DMCreateMatrix_DA_3d_MPIAIJ_Fill"
1807 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da,Mat J)
1808 {
1809   PetscErrorCode         ierr;
1810   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1811   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt,l,p,*dnz,*onz;
1812   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1813   DM_DA                  *dd = (DM_DA*)da->data;
1814   PetscInt               ifill_col,*dfill = dd->dfill,*ofill = dd->ofill;
1815   MPI_Comm               comm;
1816   PetscScalar            *values;
1817   DMDABoundaryType       bx,by,bz;
1818   ISLocalToGlobalMapping ltog,ltogb;
1819   DMDAStencilType        st;
1820 
1821   PetscFunctionBegin;
1822   /*
1823          nc - number of components per grid point
1824          col - number of colors needed in one direction for single component problem
1825 
1826   */
1827   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1828   col  = 2*s + 1;
1829   if (bx == DMDA_BOUNDARY_PERIODIC && (m % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
1830                  by 2*stencil_width + 1\n");
1831   if (by == DMDA_BOUNDARY_PERIODIC && (n % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
1832                  by 2*stencil_width + 1\n");
1833   if (bz == DMDA_BOUNDARY_PERIODIC && (p % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
1834                  by 2*stencil_width + 1\n");
1835 
1836   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1837   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1838   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1839 
1840   ierr = PetscMalloc(col*col*col*nc*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1841   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1842   ierr = DMGetLocalToGlobalMappingBlock(da,&ltogb);CHKERRQ(ierr);
1843 
1844   /* determine the matrix preallocation information */
1845   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1846 
1847 
1848   for (i=xs; i<xs+nx; i++) {
1849     istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1850     iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1851     for (j=ys; j<ys+ny; j++) {
1852       jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1853       jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1854       for (k=zs; k<zs+nz; k++) {
1855         kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1856         kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1857 
1858         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1859 
1860         for (l=0; l<nc; l++) {
1861           cnt = 0;
1862           for (ii=istart; ii<iend+1; ii++) {
1863             for (jj=jstart; jj<jend+1; jj++) {
1864               for (kk=kstart; kk<kend+1; kk++) {
1865                 if (ii || jj || kk) {
1866                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1867                     for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1868                   }
1869                 } else {
1870                   if (dfill) {
1871                     for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1872                   } else {
1873                     for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1874                   }
1875                 }
1876               }
1877             }
1878           }
1879           row  = l + nc*(slot);
1880           ierr = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1881         }
1882       }
1883     }
1884   }
1885   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1886   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1887   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1888   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1889   ierr = MatSetLocalToGlobalMappingBlock(J,ltogb,ltogb);CHKERRQ(ierr);
1890 
1891   /*
1892     For each node in the grid: we get the neighbors in the local (on processor ordering
1893     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1894     PETSc ordering.
1895   */
1896   if (!da->prealloc_only) {
1897     ierr = PetscMalloc(col*col*col*nc*nc*nc*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1898     ierr = PetscMemzero(values,col*col*col*nc*nc*nc*sizeof(PetscScalar));CHKERRQ(ierr);
1899     for (i=xs; i<xs+nx; i++) {
1900       istart = (bx == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1901       iend   = (bx == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1902       for (j=ys; j<ys+ny; j++) {
1903         jstart = (by == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1904         jend   = (by == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1905         for (k=zs; k<zs+nz; k++) {
1906           kstart = (bz == DMDA_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1907           kend   = (bz == DMDA_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1908 
1909           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1910 
1911           for (l=0; l<nc; l++) {
1912             cnt = 0;
1913             for (ii=istart; ii<iend+1; ii++) {
1914               for (jj=jstart; jj<jend+1; jj++) {
1915                 for (kk=kstart; kk<kend+1; kk++) {
1916                   if (ii || jj || kk) {
1917                     if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1918                       for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1919                     }
1920                   } else {
1921                     if (dfill) {
1922                       for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1923                     } else {
1924                       for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1925                     }
1926                   }
1927                 }
1928               }
1929             }
1930             row  = l + nc*(slot);
1931             ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1932           }
1933         }
1934       }
1935     }
1936     ierr = PetscFree(values);CHKERRQ(ierr);
1937     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1938     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1939   }
1940   ierr = PetscFree(cols);CHKERRQ(ierr);
1941   PetscFunctionReturn(0);
1942 }
1943