xref: /petsc/src/dm/impls/da/fdda.c (revision 55e7fe800d976e85ed2b5cd8bfdef564daa37bd9)
1 
2 #include <petsc/private/dmdaimpl.h> /*I      "petscdmda.h"     I*/
3 #include <petscmat.h>
4 
5 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM,ISColoringType,ISColoring*);
6 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM,ISColoringType,ISColoring*);
7 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM,ISColoringType,ISColoring*);
8 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM,ISColoringType,ISColoring*);
9 
10 /*
11    For ghost i that may be negative or greater than the upper bound this
12   maps it into the 0:m-1 range using periodicity
13 */
14 #define SetInRange(i,m) ((i < 0) ? m+i : ((i >= m) ? i-m : i))
15 
16 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill,PetscInt w,PetscInt **rfill)
17 {
18   PetscErrorCode ierr;
19   PetscInt       i,j,nz,*fill;
20 
21   PetscFunctionBegin;
22   if (!dfill) PetscFunctionReturn(0);
23 
24   /* count number nonzeros */
25   nz = 0;
26   for (i=0; i<w; i++) {
27     for (j=0; j<w; j++) {
28       if (dfill[w*i+j]) nz++;
29     }
30   }
31   ierr = PetscMalloc1(nz + w + 1,&fill);CHKERRQ(ierr);
32   /* construct modified CSR storage of nonzero structure */
33   /*  fill[0 -- w] marks starts of each row of column indices (and end of last row)
34    so fill[1] - fill[0] gives number of nonzeros in first row etc */
35   nz = w + 1;
36   for (i=0; i<w; i++) {
37     fill[i] = nz;
38     for (j=0; j<w; j++) {
39       if (dfill[w*i+j]) {
40         fill[nz] = j;
41         nz++;
42       }
43     }
44   }
45   fill[w] = nz;
46 
47   *rfill = fill;
48   PetscFunctionReturn(0);
49 }
50 
51 
52 static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse,PetscInt w,PetscInt **rfill)
53 {
54   PetscErrorCode ierr;
55   PetscInt       nz;
56 
57   PetscFunctionBegin;
58   if (!dfillsparse) PetscFunctionReturn(0);
59 
60   /* Determine number of non-zeros */
61   nz = (dfillsparse[w] - w - 1);
62 
63   /* Allocate space for our copy of the given sparse matrix representation. */
64   ierr = PetscMalloc1(nz + w + 1,rfill);CHKERRQ(ierr);
65   ierr = PetscMemcpy(*rfill,dfillsparse,(nz+w+1)*sizeof(PetscInt));
66   PetscFunctionReturn(0);
67 }
68 
69 
70 static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd)
71 {
72   PetscErrorCode ierr;
73   PetscInt       i,k,cnt = 1;
74 
75   PetscFunctionBegin;
76 
77   /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of
78    columns to the left with any nonzeros in them plus 1 */
79   ierr = PetscCalloc1(dd->w,&dd->ofillcols);CHKERRQ(ierr);
80   for (i=0; i<dd->w; i++) {
81     for (k=dd->ofill[i]; k<dd->ofill[i+1]; k++) dd->ofillcols[dd->ofill[k]] = 1;
82   }
83   for (i=0; i<dd->w; i++) {
84     if (dd->ofillcols[i]) {
85       dd->ofillcols[i] = cnt++;
86     }
87   }
88   PetscFunctionReturn(0);
89 }
90 
91 
92 
93 /*@
94     DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem
95     of the matrix returned by DMCreateMatrix().
96 
97     Logically Collective on DMDA
98 
99     Input Parameter:
100 +   da - the distributed array
101 .   dfill - the fill pattern in the diagonal block (may be NULL, means use dense block)
102 -   ofill - the fill pattern in the off-diagonal blocks
103 
104 
105     Level: developer
106 
107     Notes:
108     This only makes sense when you are doing multicomponent problems but using the
109        MPIAIJ matrix format
110 
111            The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries
112        representing coupling and 0 entries for missing coupling. For example
113 $             dfill[9] = {1, 0, 0,
114 $                         1, 1, 0,
115 $                         0, 1, 1}
116        means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with
117        itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the
118        diagonal block).
119 
120      DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
121      can be represented in the dfill, ofill format
122 
123    Contributed by Glenn Hammond
124 
125 .seealso DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly()
126 
127 @*/
128 PetscErrorCode  DMDASetBlockFills(DM da,const PetscInt *dfill,const PetscInt *ofill)
129 {
130   DM_DA          *dd = (DM_DA*)da->data;
131   PetscErrorCode ierr;
132 
133   PetscFunctionBegin;
134   /* save the given dfill and ofill information */
135   ierr = DMDASetBlockFills_Private(dfill,dd->w,&dd->dfill);CHKERRQ(ierr);
136   ierr = DMDASetBlockFills_Private(ofill,dd->w,&dd->ofill);CHKERRQ(ierr);
137 
138   /* count nonzeros in ofill columns */
139   ierr = DMDASetBlockFills_Private2(dd);CHKERRQ(ierr);
140 
141   PetscFunctionReturn(0);
142 }
143 
144 
145 /*@
146     DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem
147     of the matrix returned by DMCreateMatrix(), using sparse representations
148     of fill patterns.
149 
150     Logically Collective on DMDA
151 
152     Input Parameter:
153 +   da - the distributed array
154 .   dfill - the sparse fill pattern in the diagonal block (may be NULL, means use dense block)
155 -   ofill - the sparse fill pattern in the off-diagonal blocks
156 
157 
158     Level: developer
159 
160     Notes: This only makes sense when you are doing multicomponent problems but using the
161        MPIAIJ matrix format
162 
163            The format for dfill and ofill is a sparse representation of a
164            dof-by-dof matrix with 1 entries representing coupling and 0 entries
165            for missing coupling.  The sparse representation is a 1 dimensional
166            array of length nz + dof + 1, where nz is the number of non-zeros in
167            the matrix.  The first dof entries in the array give the
168            starting array indices of each row's items in the rest of the array,
169            the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array)
170            and the remaining nz items give the column indices of each of
171            the 1s within the logical 2D matrix.  Each row's items within
172            the array are the column indices of the 1s within that row
173            of the 2D matrix.  PETSc developers may recognize that this is the
174            same format as that computed by the DMDASetBlockFills_Private()
175            function from a dense 2D matrix representation.
176 
177      DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
178      can be represented in the dfill, ofill format
179 
180    Contributed by Philip C. Roth
181 
182 .seealso DMDASetBlockFills(), DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly()
183 
184 @*/
185 PetscErrorCode  DMDASetBlockFillsSparse(DM da,const PetscInt *dfillsparse,const PetscInt *ofillsparse)
186 {
187   DM_DA          *dd = (DM_DA*)da->data;
188   PetscErrorCode ierr;
189 
190   PetscFunctionBegin;
191   /* save the given dfill and ofill information */
192   ierr = DMDASetBlockFillsSparse_Private(dfillsparse,dd->w,&dd->dfill);CHKERRQ(ierr);
193   ierr = DMDASetBlockFillsSparse_Private(ofillsparse,dd->w,&dd->ofill);CHKERRQ(ierr);
194 
195   /* count nonzeros in ofill columns */
196   ierr = DMDASetBlockFills_Private2(dd);CHKERRQ(ierr);
197 
198   PetscFunctionReturn(0);
199 }
200 
201 
202 PetscErrorCode  DMCreateColoring_DA(DM da,ISColoringType ctype,ISColoring *coloring)
203 {
204   PetscErrorCode   ierr;
205   PetscInt         dim,m,n,p,nc;
206   DMBoundaryType bx,by,bz;
207   MPI_Comm         comm;
208   PetscMPIInt      size;
209   PetscBool        isBAIJ;
210   DM_DA            *dd = (DM_DA*)da->data;
211 
212   PetscFunctionBegin;
213   /*
214                                   m
215           ------------------------------------------------------
216          |                                                     |
217          |                                                     |
218          |               ----------------------                |
219          |               |                    |                |
220       n  |           yn  |                    |                |
221          |               |                    |                |
222          |               .---------------------                |
223          |             (xs,ys)     xn                          |
224          |            .                                        |
225          |         (gxs,gys)                                   |
226          |                                                     |
227           -----------------------------------------------------
228   */
229 
230   /*
231          nc - number of components per grid point
232          col - number of colors needed in one direction for single component problem
233 
234   */
235   ierr = DMDAGetInfo(da,&dim,0,0,0,&m,&n,&p,&nc,0,&bx,&by,&bz,0);CHKERRQ(ierr);
236 
237   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
238   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
239   if (ctype == IS_COLORING_LOCAL) {
240     if (size == 1) {
241       ctype = IS_COLORING_GLOBAL;
242     } else if (dim > 1) {
243       if ((m==1 && bx == DM_BOUNDARY_PERIODIC) || (n==1 && by == DM_BOUNDARY_PERIODIC) || (p==1 && bz == DM_BOUNDARY_PERIODIC)) {
244         SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain  on the same process");
245       }
246     }
247   }
248 
249   /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ
250      matrices is for the blocks, not the individual matrix elements  */
251   ierr = PetscStrcmp(da->mattype,MATBAIJ,&isBAIJ);CHKERRQ(ierr);
252   if (!isBAIJ) {ierr = PetscStrcmp(da->mattype,MATMPIBAIJ,&isBAIJ);CHKERRQ(ierr);}
253   if (!isBAIJ) {ierr = PetscStrcmp(da->mattype,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);}
254   if (isBAIJ) {
255     dd->w  = 1;
256     dd->xs = dd->xs/nc;
257     dd->xe = dd->xe/nc;
258     dd->Xs = dd->Xs/nc;
259     dd->Xe = dd->Xe/nc;
260   }
261 
262   /*
263      We do not provide a getcoloring function in the DMDA operations because
264    the basic DMDA does not know about matrices. We think of DMDA as being more
265    more low-level then matrices.
266   */
267   if (dim == 1) {
268     ierr = DMCreateColoring_DA_1d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
269   } else if (dim == 2) {
270     ierr =  DMCreateColoring_DA_2d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
271   } else if (dim == 3) {
272     ierr =  DMCreateColoring_DA_3d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
273   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not done for %D dimension, send us mail petsc-maint@mcs.anl.gov for code",dim);
274   if (isBAIJ) {
275     dd->w  = nc;
276     dd->xs = dd->xs*nc;
277     dd->xe = dd->xe*nc;
278     dd->Xs = dd->Xs*nc;
279     dd->Xe = dd->Xe*nc;
280   }
281   PetscFunctionReturn(0);
282 }
283 
284 /* ---------------------------------------------------------------------------------*/
285 
286 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
287 {
288   PetscErrorCode   ierr;
289   PetscInt         xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,M,N,dim,s,k,nc,col;
290   PetscInt         ncolors;
291   MPI_Comm         comm;
292   DMBoundaryType bx,by;
293   DMDAStencilType  st;
294   ISColoringValue  *colors;
295   DM_DA            *dd = (DM_DA*)da->data;
296 
297   PetscFunctionBegin;
298   /*
299          nc - number of components per grid point
300          col - number of colors needed in one direction for single component problem
301 
302   */
303   ierr = DMDAGetInfo(da,&dim,&m,&n,0,&M,&N,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
304   col  = 2*s + 1;
305   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
306   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
307   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
308 
309   /* special case as taught to us by Paul Hovland */
310   if (st == DMDA_STENCIL_STAR && s == 1) {
311     ierr = DMCreateColoring_DA_2d_5pt_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
312   } else {
313 
314     if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ2(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X (%d) is divisible\n\
315                                                             by 2*stencil_width + 1 (%d)\n", m, col);
316     if (by == DM_BOUNDARY_PERIODIC && (n % col)) SETERRQ2(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y (%d) is divisible\n\
317                                                             by 2*stencil_width + 1 (%d)\n", n, col);
318     if (ctype == IS_COLORING_GLOBAL) {
319       if (!dd->localcoloring) {
320         ierr = PetscMalloc1(nc*nx*ny,&colors);CHKERRQ(ierr);
321         ii   = 0;
322         for (j=ys; j<ys+ny; j++) {
323           for (i=xs; i<xs+nx; i++) {
324             for (k=0; k<nc; k++) {
325               colors[ii++] = k + nc*((i % col) + col*(j % col));
326             }
327           }
328         }
329         ncolors = nc + nc*(col-1 + col*(col-1));
330         ierr    = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
331       }
332       *coloring = dd->localcoloring;
333     } else if (ctype == IS_COLORING_LOCAL) {
334       if (!dd->ghostedcoloring) {
335         ierr = PetscMalloc1(nc*gnx*gny,&colors);CHKERRQ(ierr);
336         ii   = 0;
337         for (j=gys; j<gys+gny; j++) {
338           for (i=gxs; i<gxs+gnx; i++) {
339             for (k=0; k<nc; k++) {
340               /* the complicated stuff is to handle periodic boundaries */
341               colors[ii++] = k + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col));
342             }
343           }
344         }
345         ncolors = nc + nc*(col - 1 + col*(col-1));
346         ierr    = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
347         /* PetscIntView(ncolors,(PetscInt*)colors,0); */
348 
349         ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
350       }
351       *coloring = dd->ghostedcoloring;
352     } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
353   }
354   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
355   PetscFunctionReturn(0);
356 }
357 
358 /* ---------------------------------------------------------------------------------*/
359 
360 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
361 {
362   PetscErrorCode   ierr;
363   PetscInt         xs,ys,nx,ny,i,j,gxs,gys,gnx,gny,m,n,p,dim,s,k,nc,col,zs,gzs,ii,l,nz,gnz,M,N,P;
364   PetscInt         ncolors;
365   MPI_Comm         comm;
366   DMBoundaryType bx,by,bz;
367   DMDAStencilType  st;
368   ISColoringValue  *colors;
369   DM_DA            *dd = (DM_DA*)da->data;
370 
371   PetscFunctionBegin;
372   /*
373          nc - number of components per grid point
374          col - number of colors needed in one direction for single component problem
375 
376   */
377   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
378   col  = 2*s + 1;
379   if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
380                                                          by 2*stencil_width + 1\n");
381   if (by == DM_BOUNDARY_PERIODIC && (n % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
382                                                          by 2*stencil_width + 1\n");
383   if (bz == DM_BOUNDARY_PERIODIC && (p % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
384                                                          by 2*stencil_width + 1\n");
385 
386   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
387   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
388   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
389 
390   /* create the coloring */
391   if (ctype == IS_COLORING_GLOBAL) {
392     if (!dd->localcoloring) {
393       ierr = PetscMalloc1(nc*nx*ny*nz,&colors);CHKERRQ(ierr);
394       ii   = 0;
395       for (k=zs; k<zs+nz; k++) {
396         for (j=ys; j<ys+ny; j++) {
397           for (i=xs; i<xs+nx; i++) {
398             for (l=0; l<nc; l++) {
399               colors[ii++] = l + nc*((i % col) + col*(j % col) + col*col*(k % col));
400             }
401           }
402         }
403       }
404       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
405       ierr    = ISColoringCreate(comm,ncolors,nc*nx*ny*nz,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
406     }
407     *coloring = dd->localcoloring;
408   } else if (ctype == IS_COLORING_LOCAL) {
409     if (!dd->ghostedcoloring) {
410       ierr = PetscMalloc1(nc*gnx*gny*gnz,&colors);CHKERRQ(ierr);
411       ii   = 0;
412       for (k=gzs; k<gzs+gnz; k++) {
413         for (j=gys; j<gys+gny; j++) {
414           for (i=gxs; i<gxs+gnx; i++) {
415             for (l=0; l<nc; l++) {
416               /* the complicated stuff is to handle periodic boundaries */
417               colors[ii++] = l + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col) + col*col*(SetInRange(k,p) % col));
418             }
419           }
420         }
421       }
422       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
423       ierr    = ISColoringCreate(comm,ncolors,nc*gnx*gny*gnz,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
424       ierr    = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
425     }
426     *coloring = dd->ghostedcoloring;
427   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
428   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
429   PetscFunctionReturn(0);
430 }
431 
432 /* ---------------------------------------------------------------------------------*/
433 
434 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
435 {
436   PetscErrorCode   ierr;
437   PetscInt         xs,nx,i,i1,gxs,gnx,l,m,M,dim,s,nc,col;
438   PetscInt         ncolors;
439   MPI_Comm         comm;
440   DMBoundaryType bx;
441   ISColoringValue  *colors;
442   DM_DA            *dd = (DM_DA*)da->data;
443 
444   PetscFunctionBegin;
445   /*
446          nc - number of components per grid point
447          col - number of colors needed in one direction for single component problem
448 
449   */
450   ierr = DMDAGetInfo(da,&dim,&m,0,0,&M,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr);
451   col  = 2*s + 1;
452 
453   if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ2(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points %d is divisible\n\
454                                                           by 2*stencil_width + 1 %d\n",(int)m,(int)col);
455 
456   ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
457   ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
458   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
459 
460   /* create the coloring */
461   if (ctype == IS_COLORING_GLOBAL) {
462     if (!dd->localcoloring) {
463       ierr = PetscMalloc1(nc*nx,&colors);CHKERRQ(ierr);
464       if (dd->ofillcols) {
465         PetscInt tc = 0;
466         for (i=0; i<nc; i++) tc += (PetscInt) (dd->ofillcols[i] > 0);
467         i1 = 0;
468         for (i=xs; i<xs+nx; i++) {
469           for (l=0; l<nc; l++) {
470             if (dd->ofillcols[l] && (i % col)) {
471               colors[i1++] =  nc - 1 + tc*((i % col) - 1) + dd->ofillcols[l];
472             } else {
473               colors[i1++] = l;
474             }
475           }
476         }
477         ncolors = nc + 2*s*tc;
478       } else {
479         i1 = 0;
480         for (i=xs; i<xs+nx; i++) {
481           for (l=0; l<nc; l++) {
482             colors[i1++] = l + nc*(i % col);
483           }
484         }
485         ncolors = nc + nc*(col-1);
486       }
487       ierr = ISColoringCreate(comm,ncolors,nc*nx,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
488     }
489     *coloring = dd->localcoloring;
490   } else if (ctype == IS_COLORING_LOCAL) {
491     if (!dd->ghostedcoloring) {
492       ierr = PetscMalloc1(nc*gnx,&colors);CHKERRQ(ierr);
493       i1   = 0;
494       for (i=gxs; i<gxs+gnx; i++) {
495         for (l=0; l<nc; l++) {
496           /* the complicated stuff is to handle periodic boundaries */
497           colors[i1++] = l + nc*(SetInRange(i,m) % col);
498         }
499       }
500       ncolors = nc + nc*(col-1);
501       ierr    = ISColoringCreate(comm,ncolors,nc*gnx,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
502       ierr    = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
503     }
504     *coloring = dd->ghostedcoloring;
505   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
506   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
507   PetscFunctionReturn(0);
508 }
509 
510 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
511 {
512   PetscErrorCode   ierr;
513   PetscInt         xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,dim,s,k,nc;
514   PetscInt         ncolors;
515   MPI_Comm         comm;
516   DMBoundaryType bx,by;
517   ISColoringValue  *colors;
518   DM_DA            *dd = (DM_DA*)da->data;
519 
520   PetscFunctionBegin;
521   /*
522          nc - number of components per grid point
523          col - number of colors needed in one direction for single component problem
524 
525   */
526   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,0);CHKERRQ(ierr);
527   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
528   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
529   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
530 
531   if (bx == DM_BOUNDARY_PERIODIC && (m % 5)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible by 5\n");
532   if (by == DM_BOUNDARY_PERIODIC && (n % 5)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible by 5\n");
533 
534   /* create the coloring */
535   if (ctype == IS_COLORING_GLOBAL) {
536     if (!dd->localcoloring) {
537       ierr = PetscMalloc1(nc*nx*ny,&colors);CHKERRQ(ierr);
538       ii   = 0;
539       for (j=ys; j<ys+ny; j++) {
540         for (i=xs; i<xs+nx; i++) {
541           for (k=0; k<nc; k++) {
542             colors[ii++] = k + nc*((3*j+i) % 5);
543           }
544         }
545       }
546       ncolors = 5*nc;
547       ierr    = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
548     }
549     *coloring = dd->localcoloring;
550   } else if (ctype == IS_COLORING_LOCAL) {
551     if (!dd->ghostedcoloring) {
552       ierr = PetscMalloc1(nc*gnx*gny,&colors);CHKERRQ(ierr);
553       ii = 0;
554       for (j=gys; j<gys+gny; j++) {
555         for (i=gxs; i<gxs+gnx; i++) {
556           for (k=0; k<nc; k++) {
557             colors[ii++] = k + nc*((3*SetInRange(j,n) + SetInRange(i,m)) % 5);
558           }
559         }
560       }
561       ncolors = 5*nc;
562       ierr    = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
563       ierr    = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
564     }
565     *coloring = dd->ghostedcoloring;
566   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
567   PetscFunctionReturn(0);
568 }
569 
570 /* =========================================================================== */
571 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM,Mat);
572 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM,Mat);
573 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM,Mat);
574 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM,Mat);
575 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM,Mat);
576 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM,Mat);
577 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM,Mat);
578 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM,Mat);
579 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM,Mat);
580 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM,Mat);
581 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM,Mat);
582 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM,Mat);
583 extern PetscErrorCode DMCreateMatrix_DA_IS(DM,Mat);
584 
585 /*@C
586    MatSetupDM - Sets the DMDA that is to be used by the HYPRE_StructMatrix PETSc matrix
587 
588    Logically Collective on Mat
589 
590    Input Parameters:
591 +  mat - the matrix
592 -  da - the da
593 
594    Level: intermediate
595 
596 @*/
597 PetscErrorCode MatSetupDM(Mat mat,DM da)
598 {
599   PetscErrorCode ierr;
600 
601   PetscFunctionBegin;
602   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
603   PetscValidHeaderSpecificType(da,DM_CLASSID,1,DMDA);
604   ierr = PetscTryMethod(mat,"MatSetupDM_C",(Mat,DM),(mat,da));CHKERRQ(ierr);
605   PetscFunctionReturn(0);
606 }
607 
608 PetscErrorCode  MatView_MPI_DA(Mat A,PetscViewer viewer)
609 {
610   DM                da;
611   PetscErrorCode    ierr;
612   const char        *prefix;
613   Mat               Anatural;
614   AO                ao;
615   PetscInt          rstart,rend,*petsc,i;
616   IS                is;
617   MPI_Comm          comm;
618   PetscViewerFormat format;
619 
620   PetscFunctionBegin;
621   /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */
622   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
623   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(0);
624 
625   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
626   ierr = MatGetDM(A, &da);CHKERRQ(ierr);
627   if (!da) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
628 
629   ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);
630   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
631   ierr = PetscMalloc1(rend-rstart,&petsc);CHKERRQ(ierr);
632   for (i=rstart; i<rend; i++) petsc[i-rstart] = i;
633   ierr = AOApplicationToPetsc(ao,rend-rstart,petsc);CHKERRQ(ierr);
634   ierr = ISCreateGeneral(comm,rend-rstart,petsc,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
635 
636   /* call viewer on natural ordering */
637   ierr = MatCreateSubMatrix(A,is,is,MAT_INITIAL_MATRIX,&Anatural);CHKERRQ(ierr);
638   ierr = ISDestroy(&is);CHKERRQ(ierr);
639   ierr = PetscObjectGetOptionsPrefix((PetscObject)A,&prefix);CHKERRQ(ierr);
640   ierr = PetscObjectSetOptionsPrefix((PetscObject)Anatural,prefix);CHKERRQ(ierr);
641   ierr = PetscObjectSetName((PetscObject)Anatural,((PetscObject)A)->name);CHKERRQ(ierr);
642   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE;
643   ierr = MatView(Anatural,viewer);CHKERRQ(ierr);
644   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE;
645   ierr = MatDestroy(&Anatural);CHKERRQ(ierr);
646   PetscFunctionReturn(0);
647 }
648 
649 PetscErrorCode  MatLoad_MPI_DA(Mat A,PetscViewer viewer)
650 {
651   DM             da;
652   PetscErrorCode ierr;
653   Mat            Anatural,Aapp;
654   AO             ao;
655   PetscInt       rstart,rend,*app,i,m,n,M,N;
656   IS             is;
657   MPI_Comm       comm;
658 
659   PetscFunctionBegin;
660   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
661   ierr = MatGetDM(A, &da);CHKERRQ(ierr);
662   if (!da) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
663 
664   /* Load the matrix in natural ordering */
665   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Anatural);CHKERRQ(ierr);
666   ierr = MatSetType(Anatural,((PetscObject)A)->type_name);CHKERRQ(ierr);
667   ierr = MatGetSize(A,&M,&N);CHKERRQ(ierr);
668   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
669   ierr = MatSetSizes(Anatural,m,n,M,N);CHKERRQ(ierr);
670   ierr = MatLoad(Anatural,viewer);CHKERRQ(ierr);
671 
672   /* Map natural ordering to application ordering and create IS */
673   ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);
674   ierr = MatGetOwnershipRange(Anatural,&rstart,&rend);CHKERRQ(ierr);
675   ierr = PetscMalloc1(rend-rstart,&app);CHKERRQ(ierr);
676   for (i=rstart; i<rend; i++) app[i-rstart] = i;
677   ierr = AOPetscToApplication(ao,rend-rstart,app);CHKERRQ(ierr);
678   ierr = ISCreateGeneral(comm,rend-rstart,app,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
679 
680   /* Do permutation and replace header */
681   ierr = MatCreateSubMatrix(Anatural,is,is,MAT_INITIAL_MATRIX,&Aapp);CHKERRQ(ierr);
682   ierr = MatHeaderReplace(A,&Aapp);CHKERRQ(ierr);
683   ierr = ISDestroy(&is);CHKERRQ(ierr);
684   ierr = MatDestroy(&Anatural);CHKERRQ(ierr);
685   PetscFunctionReturn(0);
686 }
687 
688 PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J)
689 {
690   PetscErrorCode ierr;
691   PetscInt       dim,dof,nx,ny,nz,dims[3],starts[3],M,N,P;
692   Mat            A;
693   MPI_Comm       comm;
694   MatType        Atype;
695   PetscSection   section, sectionGlobal;
696   void           (*aij)(void)=NULL,(*baij)(void)=NULL,(*sbaij)(void)=NULL,(*sell)(void)=NULL,(*is)(void)=NULL;
697   MatType        mtype;
698   PetscMPIInt    size;
699   DM_DA          *dd = (DM_DA*)da->data;
700 
701   PetscFunctionBegin;
702   ierr = MatInitializePackage();CHKERRQ(ierr);
703   mtype = da->mattype;
704 
705   ierr = DMGetSection(da, &section);CHKERRQ(ierr);
706   if (section) {
707     PetscInt  bs = -1;
708     PetscInt  localSize;
709     PetscBool isShell, isBlock, isSeqBlock, isMPIBlock, isSymBlock, isSymSeqBlock, isSymMPIBlock, isSymmetric;
710 
711     ierr = DMGetGlobalSection(da, &sectionGlobal);CHKERRQ(ierr);
712     ierr = PetscSectionGetConstrainedStorageSize(sectionGlobal, &localSize);CHKERRQ(ierr);
713     ierr = MatCreate(PetscObjectComm((PetscObject)da),&A);CHKERRQ(ierr);
714     ierr = MatSetSizes(A,localSize,localSize,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
715     ierr = MatSetType(A,mtype);CHKERRQ(ierr);
716     ierr = PetscStrcmp(mtype,MATSHELL,&isShell);CHKERRQ(ierr);
717     ierr = PetscStrcmp(mtype,MATBAIJ,&isBlock);CHKERRQ(ierr);
718     ierr = PetscStrcmp(mtype,MATSEQBAIJ,&isSeqBlock);CHKERRQ(ierr);
719     ierr = PetscStrcmp(mtype,MATMPIBAIJ,&isMPIBlock);CHKERRQ(ierr);
720     ierr = PetscStrcmp(mtype,MATSBAIJ,&isSymBlock);CHKERRQ(ierr);
721     ierr = PetscStrcmp(mtype,MATSEQSBAIJ,&isSymSeqBlock);CHKERRQ(ierr);
722     ierr = PetscStrcmp(mtype,MATMPISBAIJ,&isSymMPIBlock);CHKERRQ(ierr);
723     /* Check for symmetric storage */
724     isSymmetric = (PetscBool) (isSymBlock || isSymSeqBlock || isSymMPIBlock);
725     if (isSymmetric) {
726       ierr = MatSetOption(*J, MAT_IGNORE_LOWER_TRIANGULAR, PETSC_TRUE);CHKERRQ(ierr);
727     }
728     if (!isShell) {
729       PetscInt *dnz, *onz, *dnzu, *onzu, bsLocal;
730 
731       if (bs < 0) {
732         if (isBlock || isSeqBlock || isMPIBlock || isSymBlock || isSymSeqBlock || isSymMPIBlock) {
733           PetscInt pStart, pEnd, p, dof;
734 
735           ierr = PetscSectionGetChart(sectionGlobal, &pStart, &pEnd);CHKERRQ(ierr);
736           for (p = pStart; p < pEnd; ++p) {
737             ierr = PetscSectionGetDof(sectionGlobal, p, &dof);CHKERRQ(ierr);
738             if (dof) {
739               bs = dof;
740               break;
741             }
742           }
743         } else {
744           bs = 1;
745         }
746         /* Must have same blocksize on all procs (some might have no points) */
747         bsLocal = bs;
748         ierr    = MPIU_Allreduce(&bsLocal, &bs, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)da));CHKERRQ(ierr);
749       }
750       ierr = PetscCalloc4(localSize/bs, &dnz, localSize/bs, &onz, localSize/bs, &dnzu, localSize/bs, &onzu);CHKERRQ(ierr);
751       /* ierr = DMPlexPreallocateOperator(dm, bs, section, sectionGlobal, dnz, onz, dnzu, onzu, *J, fillMatrix);CHKERRQ(ierr); */
752       ierr = PetscFree4(dnz, onz, dnzu, onzu);CHKERRQ(ierr);
753     }
754   }
755   /*
756                                   m
757           ------------------------------------------------------
758          |                                                     |
759          |                                                     |
760          |               ----------------------                |
761          |               |                    |                |
762       n  |           ny  |                    |                |
763          |               |                    |                |
764          |               .---------------------                |
765          |             (xs,ys)     nx                          |
766          |            .                                        |
767          |         (gxs,gys)                                   |
768          |                                                     |
769           -----------------------------------------------------
770   */
771 
772   /*
773          nc - number of components per grid point
774          col - number of colors needed in one direction for single component problem
775 
776   */
777   M   = dd->M;
778   N   = dd->N;
779   P   = dd->P;
780   dim = da->dim;
781   dof = dd->w;
782   /* ierr = DMDAGetInfo(da,&dim,&M,&N,&P,0,0,0,&dof,0,0,0,0,0);CHKERRQ(ierr); */
783   ierr = DMDAGetCorners(da,0,0,0,&nx,&ny,&nz);CHKERRQ(ierr);
784   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
785   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
786   ierr = MatSetSizes(A,dof*nx*ny*nz,dof*nx*ny*nz,dof*M*N*P,dof*M*N*P);CHKERRQ(ierr);
787   ierr = MatSetType(A,mtype);CHKERRQ(ierr);
788   ierr = MatSetFromOptions(A);CHKERRQ(ierr);
789   ierr = MatSetDM(A,da);CHKERRQ(ierr);
790   if (da->structure_only) {
791     ierr = MatSetOption(A,MAT_STRUCTURE_ONLY,PETSC_TRUE);CHKERRQ(ierr);
792   }
793   ierr = MatGetType(A,&Atype);CHKERRQ(ierr);
794   /*
795      We do not provide a getmatrix function in the DMDA operations because
796    the basic DMDA does not know about matrices. We think of DMDA as being more
797    more low-level than matrices. This is kind of cheating but, cause sometimes
798    we think of DMDA has higher level than matrices.
799 
800      We could switch based on Atype (or mtype), but we do not since the
801    specialized setting routines depend only on the particular preallocation
802    details of the matrix, not the type itself.
803   */
804   ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJSetPreallocation_C",&aij);CHKERRQ(ierr);
805   if (!aij) {
806     ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJSetPreallocation_C",&aij);CHKERRQ(ierr);
807   }
808   if (!aij) {
809     ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIBAIJSetPreallocation_C",&baij);CHKERRQ(ierr);
810     if (!baij) {
811       ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",&baij);CHKERRQ(ierr);
812     }
813     if (!baij) {
814       ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPISBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr);
815       if (!sbaij) {
816         ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr);
817       }
818       if (!sbaij) {
819         ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPISELLSetPreallocation_C",&sell);CHKERRQ(ierr);
820         if (!sell) {
821           ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqSELLSetPreallocation_C",&sell);CHKERRQ(ierr);
822         }
823       }
824       if (!sell) {
825         ierr = PetscObjectQueryFunction((PetscObject)A,"MatISSetPreallocation_C",&is);CHKERRQ(ierr);
826       }
827     }
828   }
829   if (aij) {
830     if (dim == 1) {
831       if (dd->ofill) {
832         ierr = DMCreateMatrix_DA_1d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
833       } else {
834         ierr = DMCreateMatrix_DA_1d_MPIAIJ(da,A);CHKERRQ(ierr);
835       }
836     } else if (dim == 2) {
837       if (dd->ofill) {
838         ierr = DMCreateMatrix_DA_2d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
839       } else {
840         ierr = DMCreateMatrix_DA_2d_MPIAIJ(da,A);CHKERRQ(ierr);
841       }
842     } else if (dim == 3) {
843       if (dd->ofill) {
844         ierr = DMCreateMatrix_DA_3d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
845       } else {
846         ierr = DMCreateMatrix_DA_3d_MPIAIJ(da,A);CHKERRQ(ierr);
847       }
848     }
849   } else if (baij) {
850     if (dim == 2) {
851       ierr = DMCreateMatrix_DA_2d_MPIBAIJ(da,A);CHKERRQ(ierr);
852     } else if (dim == 3) {
853       ierr = DMCreateMatrix_DA_3d_MPIBAIJ(da,A);CHKERRQ(ierr);
854     } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
855   } else if (sbaij) {
856     if (dim == 2) {
857       ierr = DMCreateMatrix_DA_2d_MPISBAIJ(da,A);CHKERRQ(ierr);
858     } else if (dim == 3) {
859       ierr = DMCreateMatrix_DA_3d_MPISBAIJ(da,A);CHKERRQ(ierr);
860     } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
861   } else if (sell) {
862      if (dim == 2) {
863        ierr = DMCreateMatrix_DA_2d_MPISELL(da,A);CHKERRQ(ierr);
864      } else if (dim == 3) {
865        ierr = DMCreateMatrix_DA_3d_MPISELL(da,A);CHKERRQ(ierr);
866      } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
867   } else if (is) {
868     ierr = DMCreateMatrix_DA_IS(da,A);CHKERRQ(ierr);
869   } else {
870     ISLocalToGlobalMapping ltog;
871 
872     ierr = MatSetBlockSize(A,dof);CHKERRQ(ierr);
873     ierr = MatSetUp(A);CHKERRQ(ierr);
874     ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
875     ierr = MatSetLocalToGlobalMapping(A,ltog,ltog);CHKERRQ(ierr);
876   }
877   ierr = DMDAGetGhostCorners(da,&starts[0],&starts[1],&starts[2],&dims[0],&dims[1],&dims[2]);CHKERRQ(ierr);
878   ierr = MatSetStencil(A,dim,dims,starts,dof);CHKERRQ(ierr);
879   ierr = MatSetDM(A,da);CHKERRQ(ierr);
880   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
881   if (size > 1) {
882     /* change viewer to display matrix in natural ordering */
883     ierr = MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA);CHKERRQ(ierr);
884     ierr = MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA);CHKERRQ(ierr);
885   }
886   *J = A;
887   PetscFunctionReturn(0);
888 }
889 
890 /* ---------------------------------------------------------------------------------*/
891 PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
892 
893 PetscErrorCode DMCreateMatrix_DA_IS(DM dm,Mat J)
894 {
895   DM_DA                  *da = (DM_DA*)dm->data;
896   Mat                    lJ;
897   ISLocalToGlobalMapping ltog;
898   IS                     is_loc_filt, is_glob;
899   const PetscInt         *e_loc,*idx;
900   PetscInt               nel,nen,nv,dof,dim,*gidx,nb;
901   PetscBool              flg;
902   PetscErrorCode         ierr;
903 
904   /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted)
905      We need to filter the local indices that are represented through the DMDAGetElements decomposition
906      This is because the size of the local matrices in MATIS is the local size of the l2g map */
907   PetscFunctionBegin;
908   dof  = da->w;
909   dim  = dm->dim;
910 
911   ierr = MatSetBlockSize(J,dof);CHKERRQ(ierr);
912 
913   /* get local elements indices in local DMDA numbering */
914   ierr = DMDAGetElements(dm,&nel,&nen,&e_loc);CHKERRQ(ierr); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */
915   ierr = ISCreateBlock(PetscObjectComm((PetscObject)dm),dof,nel*nen,e_loc,PETSC_COPY_VALUES,&is_loc_filt);CHKERRQ(ierr);
916   ierr = DMDARestoreElements(dm,&nel,&nen,&e_loc);CHKERRQ(ierr);
917 
918   /* obtain a consistent local ordering for MATIS */
919   ierr = ISSortRemoveDups(is_loc_filt);CHKERRQ(ierr);
920   ierr = ISBlockGetLocalSize(is_loc_filt,&nb);CHKERRQ(ierr);
921   ierr = DMGetLocalToGlobalMapping(dm,&ltog);CHKERRQ(ierr);
922   ierr = ISLocalToGlobalMappingGetSize(ltog,&nv);CHKERRQ(ierr);
923   ierr = PetscMalloc1(PetscMax(nb,nv/dof),&gidx);CHKERRQ(ierr);
924   ierr = ISBlockGetIndices(is_loc_filt,&idx);CHKERRQ(ierr);
925   ierr = ISLocalToGlobalMappingApplyBlock(ltog,nb,idx,gidx);CHKERRQ(ierr);
926   ierr = ISBlockRestoreIndices(is_loc_filt,&idx);CHKERRQ(ierr);
927   ierr = ISCreateBlock(PetscObjectComm((PetscObject)dm),dof,nb,gidx,PETSC_USE_POINTER,&is_glob);CHKERRQ(ierr);
928   ierr = ISLocalToGlobalMappingCreateIS(is_glob,&ltog);CHKERRQ(ierr);
929   ierr = ISDestroy(&is_glob);CHKERRQ(ierr);
930   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
931   ierr = ISLocalToGlobalMappingDestroy(&ltog);CHKERRQ(ierr);
932 
933   /* We also attach a l2g map to the local matrices to have MatSetValueLocal to work */
934   ierr = MatISGetLocalMat(J,&lJ);CHKERRQ(ierr);
935   ierr = ISLocalToGlobalMappingCreateIS(is_loc_filt,&ltog);CHKERRQ(ierr);
936   ierr = ISDestroy(&is_loc_filt);CHKERRQ(ierr);
937   ierr = ISCreateStride(PetscObjectComm((PetscObject)lJ),nv/dof,0,1,&is_glob);CHKERRQ(ierr);
938   ierr = ISGetIndices(is_glob,&idx);CHKERRQ(ierr);
939   ierr = ISGlobalToLocalMappingApplyBlock(ltog,IS_GTOLM_MASK,nv/dof,idx,&nb,gidx);CHKERRQ(ierr);
940   ierr = ISRestoreIndices(is_glob,&idx);CHKERRQ(ierr);
941   ierr = ISDestroy(&is_glob);CHKERRQ(ierr);
942   ierr = ISLocalToGlobalMappingDestroy(&ltog);CHKERRQ(ierr);
943   ierr = ISCreateBlock(PETSC_COMM_SELF,dof,nb,gidx,PETSC_USE_POINTER,&is_loc_filt);CHKERRQ(ierr);
944   ierr = ISLocalToGlobalMappingCreateIS(is_loc_filt,&ltog);CHKERRQ(ierr);
945   ierr = ISDestroy(&is_loc_filt);CHKERRQ(ierr);
946   ierr = MatSetLocalToGlobalMapping(lJ,ltog,ltog);CHKERRQ(ierr);
947   ierr = ISLocalToGlobalMappingDestroy(&ltog);CHKERRQ(ierr);
948   ierr = PetscFree(gidx);CHKERRQ(ierr);
949 
950   /* Preallocation (not exact): we reuse the preallocation routines of the assembled version  */
951   flg = dm->prealloc_only;
952   dm->prealloc_only = PETSC_TRUE;
953   switch (dim) {
954   case 1:
955     ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",MatISSetPreallocation_IS);CHKERRQ(ierr);
956     ierr = DMCreateMatrix_DA_1d_MPIAIJ(dm,J);CHKERRQ(ierr);
957     ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
958     break;
959   case 2:
960     ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",MatISSetPreallocation_IS);CHKERRQ(ierr);
961     ierr = DMCreateMatrix_DA_2d_MPIAIJ(dm,J);CHKERRQ(ierr);
962     ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
963     break;
964   case 3:
965     ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",MatISSetPreallocation_IS);CHKERRQ(ierr);
966     ierr = DMCreateMatrix_DA_3d_MPIAIJ(dm,J);CHKERRQ(ierr);
967     ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
968     break;
969   default:
970     SETERRQ1(PetscObjectComm((PetscObject)dm),PETSC_ERR_SUP,"Unhandled dimension %d",dim);
971     break;
972   }
973   dm->prealloc_only = flg;
974   PetscFunctionReturn(0);
975 }
976 
977 PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da,Mat J)
978 {
979   PetscErrorCode         ierr;
980   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p;
981   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
982   MPI_Comm               comm;
983   PetscScalar            *values;
984   DMBoundaryType         bx,by;
985   ISLocalToGlobalMapping ltog;
986   DMDAStencilType        st;
987 
988   PetscFunctionBegin;
989   /*
990          nc - number of components per grid point
991          col - number of colors needed in one direction for single component problem
992 
993   */
994   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
995   col  = 2*s + 1;
996   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
997   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
998   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
999 
1000   ierr = PetscMalloc2(nc,&rows,col*col*nc*nc,&cols);CHKERRQ(ierr);
1001   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1002 
1003   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1004   /* determine the matrix preallocation information */
1005   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
1006   for (i=xs; i<xs+nx; i++) {
1007 
1008     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1009     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1010 
1011     for (j=ys; j<ys+ny; j++) {
1012       slot = i - gxs + gnx*(j - gys);
1013 
1014       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1015       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1016 
1017       cnt = 0;
1018       for (k=0; k<nc; k++) {
1019         for (l=lstart; l<lend+1; l++) {
1020           for (p=pstart; p<pend+1; p++) {
1021             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
1022               cols[cnt++] = k + nc*(slot + gnx*l + p);
1023             }
1024           }
1025         }
1026         rows[k] = k + nc*(slot);
1027       }
1028       ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1029     }
1030   }
1031   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1032   ierr = MatSeqSELLSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1033   ierr = MatMPISELLSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1034   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1035 
1036   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1037 
1038   /*
1039     For each node in the grid: we get the neighbors in the local (on processor ordering
1040     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1041     PETSc ordering.
1042   */
1043   if (!da->prealloc_only) {
1044     ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr);
1045     for (i=xs; i<xs+nx; i++) {
1046 
1047       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1048       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1049 
1050       for (j=ys; j<ys+ny; j++) {
1051         slot = i - gxs + gnx*(j - gys);
1052 
1053         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1054         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1055 
1056         cnt = 0;
1057         for (k=0; k<nc; k++) {
1058           for (l=lstart; l<lend+1; l++) {
1059             for (p=pstart; p<pend+1; p++) {
1060               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
1061                 cols[cnt++] = k + nc*(slot + gnx*l + p);
1062               }
1063             }
1064           }
1065           rows[k] = k + nc*(slot);
1066         }
1067         ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1068       }
1069     }
1070     ierr = PetscFree(values);CHKERRQ(ierr);
1071     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1072     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1073     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1074   }
1075   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1076   PetscFunctionReturn(0);
1077 }
1078 
1079 PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da,Mat J)
1080 {
1081   PetscErrorCode         ierr;
1082   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1083   PetscInt               m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL;
1084   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
1085   MPI_Comm               comm;
1086   PetscScalar            *values;
1087   DMBoundaryType         bx,by,bz;
1088   ISLocalToGlobalMapping ltog;
1089   DMDAStencilType        st;
1090 
1091   PetscFunctionBegin;
1092   /*
1093          nc - number of components per grid point
1094          col - number of colors needed in one direction for single component problem
1095 
1096   */
1097   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1098   col  = 2*s + 1;
1099   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1100   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1101   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1102 
1103   ierr = PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols);CHKERRQ(ierr);
1104   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1105 
1106   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1107   /* determine the matrix preallocation information */
1108   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1109   for (i=xs; i<xs+nx; i++) {
1110     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1111     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1112     for (j=ys; j<ys+ny; j++) {
1113       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1114       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1115       for (k=zs; k<zs+nz; k++) {
1116         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1117         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1118 
1119         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1120 
1121         cnt = 0;
1122         for (l=0; l<nc; l++) {
1123           for (ii=istart; ii<iend+1; ii++) {
1124             for (jj=jstart; jj<jend+1; jj++) {
1125               for (kk=kstart; kk<kend+1; kk++) {
1126                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1127                   cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1128                 }
1129               }
1130             }
1131           }
1132           rows[l] = l + nc*(slot);
1133         }
1134         ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1135       }
1136     }
1137   }
1138   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1139   ierr = MatSeqSELLSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1140   ierr = MatMPISELLSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1141   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1142   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1143 
1144   /*
1145     For each node in the grid: we get the neighbors in the local (on processor ordering
1146     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1147     PETSc ordering.
1148   */
1149   if (!da->prealloc_only) {
1150     ierr = PetscCalloc1(col*col*col*nc*nc*nc,&values);CHKERRQ(ierr);
1151     for (i=xs; i<xs+nx; i++) {
1152       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1153       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1154       for (j=ys; j<ys+ny; j++) {
1155         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1156         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1157         for (k=zs; k<zs+nz; k++) {
1158           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1159           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1160 
1161           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1162 
1163           cnt = 0;
1164           for (l=0; l<nc; l++) {
1165             for (ii=istart; ii<iend+1; ii++) {
1166               for (jj=jstart; jj<jend+1; jj++) {
1167                 for (kk=kstart; kk<kend+1; kk++) {
1168                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1169                     cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1170                   }
1171                 }
1172               }
1173             }
1174             rows[l] = l + nc*(slot);
1175           }
1176           ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1177         }
1178       }
1179     }
1180     ierr = PetscFree(values);CHKERRQ(ierr);
1181     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1182     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1183     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1184   }
1185   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1186   PetscFunctionReturn(0);
1187 }
1188 
1189 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da,Mat J)
1190 {
1191   PetscErrorCode         ierr;
1192   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,M,N;
1193   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
1194   MPI_Comm               comm;
1195   PetscScalar            *values;
1196   DMBoundaryType         bx,by;
1197   ISLocalToGlobalMapping ltog,mltog;
1198   DMDAStencilType        st;
1199   PetscBool              removedups = PETSC_FALSE;
1200 
1201   PetscFunctionBegin;
1202   /*
1203          nc - number of components per grid point
1204          col - number of colors needed in one direction for single component problem
1205 
1206   */
1207   ierr = DMDAGetInfo(da,&dim,&m,&n,&M,&N,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
1208   col  = 2*s + 1;
1209   /*
1210        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1211        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1212   */
1213   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1214   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1215   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
1216   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
1217   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1218 
1219   ierr = PetscMalloc2(nc,&rows,col*col*nc*nc,&cols);CHKERRQ(ierr);
1220   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1221 
1222   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1223   /* determine the matrix preallocation information */
1224   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
1225   for (i=xs; i<xs+nx; i++) {
1226 
1227     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1228     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1229 
1230     for (j=ys; j<ys+ny; j++) {
1231       slot = i - gxs + gnx*(j - gys);
1232 
1233       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1234       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1235 
1236       cnt = 0;
1237       for (k=0; k<nc; k++) {
1238         for (l=lstart; l<lend+1; l++) {
1239           for (p=pstart; p<pend+1; p++) {
1240             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
1241               cols[cnt++] = k + nc*(slot + gnx*l + p);
1242             }
1243           }
1244         }
1245         rows[k] = k + nc*(slot);
1246       }
1247       if (removedups) {
1248         ierr = MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1249       } else {
1250         ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1251       }
1252     }
1253   }
1254   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1255   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1256   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1257   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1258   ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr);
1259   if (!mltog) {
1260     ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1261   }
1262 
1263   /*
1264     For each node in the grid: we get the neighbors in the local (on processor ordering
1265     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1266     PETSc ordering.
1267   */
1268   if (!da->prealloc_only) {
1269     ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr);
1270     for (i=xs; i<xs+nx; i++) {
1271 
1272       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1273       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1274 
1275       for (j=ys; j<ys+ny; j++) {
1276         slot = i - gxs + gnx*(j - gys);
1277 
1278         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1279         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1280 
1281         cnt = 0;
1282         for (k=0; k<nc; k++) {
1283           for (l=lstart; l<lend+1; l++) {
1284             for (p=pstart; p<pend+1; p++) {
1285               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
1286                 cols[cnt++] = k + nc*(slot + gnx*l + p);
1287               }
1288             }
1289           }
1290           rows[k] = k + nc*(slot);
1291         }
1292         ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1293       }
1294     }
1295     ierr = PetscFree(values);CHKERRQ(ierr);
1296     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1297     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1298     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1299   }
1300   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1301   PetscFunctionReturn(0);
1302 }
1303 
1304 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da,Mat J)
1305 {
1306   PetscErrorCode         ierr;
1307   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1308   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt,maxcnt = 0,l,p,M,N;
1309   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
1310   DM_DA                  *dd = (DM_DA*)da->data;
1311   PetscInt               ifill_col,*ofill = dd->ofill, *dfill = dd->dfill;
1312   MPI_Comm               comm;
1313   PetscScalar            *values;
1314   DMBoundaryType         bx,by;
1315   ISLocalToGlobalMapping ltog;
1316   DMDAStencilType        st;
1317   PetscBool              removedups = PETSC_FALSE;
1318 
1319   PetscFunctionBegin;
1320   /*
1321          nc - number of components per grid point
1322          col - number of colors needed in one direction for single component problem
1323 
1324   */
1325   ierr = DMDAGetInfo(da,&dim,&m,&n,&M,&N,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
1326   col  = 2*s + 1;
1327   /*
1328        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1329        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1330   */
1331   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1332   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1333   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
1334   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
1335   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1336 
1337   ierr = PetscMalloc1(col*col*nc,&cols);CHKERRQ(ierr);
1338   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1339 
1340   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1341   /* determine the matrix preallocation information */
1342   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
1343   for (i=xs; i<xs+nx; i++) {
1344 
1345     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1346     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1347 
1348     for (j=ys; j<ys+ny; j++) {
1349       slot = i - gxs + gnx*(j - gys);
1350 
1351       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1352       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1353 
1354       for (k=0; k<nc; k++) {
1355         cnt = 0;
1356         for (l=lstart; l<lend+1; l++) {
1357           for (p=pstart; p<pend+1; p++) {
1358             if (l || p) {
1359               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
1360                 for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p);
1361               }
1362             } else {
1363               if (dfill) {
1364                 for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p);
1365               } else {
1366                 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p);
1367               }
1368             }
1369           }
1370         }
1371         row    = k + nc*(slot);
1372         maxcnt = PetscMax(maxcnt,cnt);
1373         if (removedups) {
1374           ierr   = MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1375         } else {
1376           ierr   = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1377         }
1378       }
1379     }
1380   }
1381   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1382   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1383   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1384   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1385 
1386   /*
1387     For each node in the grid: we get the neighbors in the local (on processor ordering
1388     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1389     PETSc ordering.
1390   */
1391   if (!da->prealloc_only) {
1392     ierr = PetscCalloc1(maxcnt,&values);CHKERRQ(ierr);
1393     for (i=xs; i<xs+nx; i++) {
1394 
1395       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1396       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1397 
1398       for (j=ys; j<ys+ny; j++) {
1399         slot = i - gxs + gnx*(j - gys);
1400 
1401         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1402         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1403 
1404         for (k=0; k<nc; k++) {
1405           cnt = 0;
1406           for (l=lstart; l<lend+1; l++) {
1407             for (p=pstart; p<pend+1; p++) {
1408               if (l || p) {
1409                 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
1410                   for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p);
1411                 }
1412               } else {
1413                 if (dfill) {
1414                   for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p);
1415                 } else {
1416                   for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p);
1417                 }
1418               }
1419             }
1420           }
1421           row  = k + nc*(slot);
1422           ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1423         }
1424       }
1425     }
1426     ierr = PetscFree(values);CHKERRQ(ierr);
1427     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1428     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1429     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1430   }
1431   ierr = PetscFree(cols);CHKERRQ(ierr);
1432   PetscFunctionReturn(0);
1433 }
1434 
1435 /* ---------------------------------------------------------------------------------*/
1436 
1437 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da,Mat J)
1438 {
1439   PetscErrorCode         ierr;
1440   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1441   PetscInt               m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL;
1442   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
1443   MPI_Comm               comm;
1444   PetscScalar            *values;
1445   DMBoundaryType         bx,by,bz;
1446   ISLocalToGlobalMapping ltog,mltog;
1447   DMDAStencilType        st;
1448   PetscBool              removedups = PETSC_FALSE;
1449 
1450   PetscFunctionBegin;
1451   /*
1452          nc - number of components per grid point
1453          col - number of colors needed in one direction for single component problem
1454 
1455   */
1456   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1457   col  = 2*s + 1;
1458 
1459   /*
1460        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1461        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1462   */
1463   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1464   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1465   if (P == 1 && 2*s >= p) removedups = PETSC_TRUE;
1466 
1467   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1468   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1469   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1470 
1471   ierr = PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols);CHKERRQ(ierr);
1472   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1473 
1474   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1475   /* determine the matrix preallocation information */
1476   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1477   for (i=xs; i<xs+nx; i++) {
1478     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1479     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1480     for (j=ys; j<ys+ny; j++) {
1481       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1482       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1483       for (k=zs; k<zs+nz; k++) {
1484         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1485         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1486 
1487         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1488 
1489         cnt = 0;
1490         for (l=0; l<nc; l++) {
1491           for (ii=istart; ii<iend+1; ii++) {
1492             for (jj=jstart; jj<jend+1; jj++) {
1493               for (kk=kstart; kk<kend+1; kk++) {
1494                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1495                   cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1496                 }
1497               }
1498             }
1499           }
1500           rows[l] = l + nc*(slot);
1501         }
1502         if (removedups) {
1503           ierr = MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1504         } else {
1505           ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1506         }
1507       }
1508     }
1509   }
1510   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1511   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1512   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1513   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1514   ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr);
1515   if (!mltog) {
1516     ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1517   }
1518 
1519   /*
1520     For each node in the grid: we get the neighbors in the local (on processor ordering
1521     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1522     PETSc ordering.
1523   */
1524   if (!da->prealloc_only) {
1525     ierr = PetscCalloc1(col*col*col*nc*nc*nc,&values);CHKERRQ(ierr);
1526     for (i=xs; i<xs+nx; i++) {
1527       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1528       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1529       for (j=ys; j<ys+ny; j++) {
1530         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1531         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1532         for (k=zs; k<zs+nz; k++) {
1533           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1534           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1535 
1536           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1537 
1538           cnt = 0;
1539           for (l=0; l<nc; l++) {
1540             for (ii=istart; ii<iend+1; ii++) {
1541               for (jj=jstart; jj<jend+1; jj++) {
1542                 for (kk=kstart; kk<kend+1; kk++) {
1543                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1544                     cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1545                   }
1546                 }
1547               }
1548             }
1549             rows[l] = l + nc*(slot);
1550           }
1551           ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1552         }
1553       }
1554     }
1555     ierr = PetscFree(values);CHKERRQ(ierr);
1556     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1557     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1558     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1559   }
1560   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1561   PetscFunctionReturn(0);
1562 }
1563 
1564 /* ---------------------------------------------------------------------------------*/
1565 
1566 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da,Mat J)
1567 {
1568   PetscErrorCode         ierr;
1569   DM_DA                  *dd = (DM_DA*)da->data;
1570   PetscInt               xs,nx,i,j,gxs,gnx,row,k,l;
1571   PetscInt               m,dim,s,*cols = NULL,nc,cnt,maxcnt = 0,*ocols;
1572   PetscInt               *ofill = dd->ofill,*dfill = dd->dfill;
1573   PetscScalar            *values;
1574   DMBoundaryType         bx;
1575   ISLocalToGlobalMapping ltog;
1576   PetscMPIInt            rank,size;
1577 
1578   PetscFunctionBegin;
1579   if (dd->bx == DM_BOUNDARY_PERIODIC) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"With fill provided not implemented with periodic boundary conditions");
1580   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)da),&rank);CHKERRQ(ierr);
1581   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)da),&size);CHKERRQ(ierr);
1582 
1583   /*
1584          nc - number of components per grid point
1585 
1586   */
1587   ierr = DMDAGetInfo(da,&dim,&m,0,0,0,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr);
1588   ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
1589   ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
1590 
1591   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1592   ierr = PetscCalloc2(nx*nc,&cols,nx*nc,&ocols);CHKERRQ(ierr);
1593 
1594   /*
1595         note should be smaller for first and last process with no periodic
1596         does not handle dfill
1597   */
1598   cnt = 0;
1599   /* coupling with process to the left */
1600   for (i=0; i<s; i++) {
1601     for (j=0; j<nc; j++) {
1602       ocols[cnt] = ((!rank) ? 0 : (s - i)*(ofill[j+1] - ofill[j]));
1603       cols[cnt]  = dfill[j+1] - dfill[j] + (s + i)*(ofill[j+1] - ofill[j]);
1604       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1605       cnt++;
1606     }
1607   }
1608   for (i=s; i<nx-s; i++) {
1609     for (j=0; j<nc; j++) {
1610       cols[cnt] = dfill[j+1] - dfill[j] + 2*s*(ofill[j+1] - ofill[j]);
1611       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1612       cnt++;
1613     }
1614   }
1615   /* coupling with process to the right */
1616   for (i=nx-s; i<nx; i++) {
1617     for (j=0; j<nc; j++) {
1618       ocols[cnt] = ((rank == (size-1)) ? 0 : (i - nx + s + 1)*(ofill[j+1] - ofill[j]));
1619       cols[cnt]  = dfill[j+1] - dfill[j] + (s + nx - i - 1)*(ofill[j+1] - ofill[j]);
1620       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1621       cnt++;
1622     }
1623   }
1624 
1625   ierr = MatSeqAIJSetPreallocation(J,0,cols);CHKERRQ(ierr);
1626   ierr = MatMPIAIJSetPreallocation(J,0,cols,0,ocols);CHKERRQ(ierr);
1627   ierr = PetscFree2(cols,ocols);CHKERRQ(ierr);
1628 
1629   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1630   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1631 
1632   /*
1633     For each node in the grid: we get the neighbors in the local (on processor ordering
1634     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1635     PETSc ordering.
1636   */
1637   if (!da->prealloc_only) {
1638     ierr = PetscCalloc2(maxcnt,&values,maxcnt,&cols);CHKERRQ(ierr);
1639 
1640     row = xs*nc;
1641     /* coupling with process to the left */
1642     for (i=xs; i<xs+s; i++) {
1643       for (j=0; j<nc; j++) {
1644         cnt = 0;
1645         if (rank) {
1646           for (l=0; l<s; l++) {
1647             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1648           }
1649         }
1650         if (dfill) {
1651           for (k=dfill[j]; k<dfill[j+1]; k++) {
1652             cols[cnt++] = i*nc + dfill[k];
1653           }
1654         } else {
1655           for (k=0; k<nc; k++) {
1656             cols[cnt++] = i*nc + k;
1657           }
1658         }
1659         for (l=0; l<s; l++) {
1660           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1661         }
1662         ierr = MatSetValues(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1663         row++;
1664       }
1665     }
1666     for (i=xs+s; i<xs+nx-s; i++) {
1667       for (j=0; j<nc; j++) {
1668         cnt = 0;
1669         for (l=0; l<s; l++) {
1670           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1671         }
1672         if (dfill) {
1673           for (k=dfill[j]; k<dfill[j+1]; k++) {
1674             cols[cnt++] = i*nc + dfill[k];
1675           }
1676         } else {
1677           for (k=0; k<nc; k++) {
1678             cols[cnt++] = i*nc + k;
1679           }
1680         }
1681         for (l=0; l<s; l++) {
1682           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1683         }
1684         ierr = MatSetValues(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1685         row++;
1686       }
1687     }
1688     /* coupling with process to the right */
1689     for (i=xs+nx-s; i<xs+nx; i++) {
1690       for (j=0; j<nc; j++) {
1691         cnt = 0;
1692         for (l=0; l<s; l++) {
1693           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1694         }
1695         if (dfill) {
1696           for (k=dfill[j]; k<dfill[j+1]; k++) {
1697             cols[cnt++] = i*nc + dfill[k];
1698           }
1699         } else {
1700           for (k=0; k<nc; k++) {
1701             cols[cnt++] = i*nc + k;
1702           }
1703         }
1704         if (rank < size-1) {
1705           for (l=0; l<s; l++) {
1706             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1707           }
1708         }
1709         ierr = MatSetValues(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1710         row++;
1711       }
1712     }
1713     ierr = PetscFree2(values,cols);CHKERRQ(ierr);
1714     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1715     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1716     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1717   }
1718   PetscFunctionReturn(0);
1719 }
1720 
1721 /* ---------------------------------------------------------------------------------*/
1722 
1723 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da,Mat J)
1724 {
1725   PetscErrorCode         ierr;
1726   PetscInt               xs,nx,i,i1,slot,gxs,gnx;
1727   PetscInt               m,dim,s,*cols = NULL,nc,*rows = NULL,col,cnt,l;
1728   PetscInt               istart,iend;
1729   PetscScalar            *values;
1730   DMBoundaryType         bx;
1731   ISLocalToGlobalMapping ltog,mltog;
1732 
1733   PetscFunctionBegin;
1734   /*
1735          nc - number of components per grid point
1736          col - number of colors needed in one direction for single component problem
1737 
1738   */
1739   ierr = DMDAGetInfo(da,&dim,&m,0,0,0,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr);
1740   col  = 2*s + 1;
1741 
1742   ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
1743   ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
1744 
1745   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1746   ierr = MatSeqAIJSetPreallocation(J,col*nc,0);CHKERRQ(ierr);
1747   ierr = MatMPIAIJSetPreallocation(J,col*nc,0,col*nc,0);CHKERRQ(ierr);
1748 
1749   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1750   ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr);
1751   if (!mltog) {
1752     ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1753   }
1754 
1755   /*
1756     For each node in the grid: we get the neighbors in the local (on processor ordering
1757     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1758     PETSc ordering.
1759   */
1760   if (!da->prealloc_only) {
1761     ierr = PetscMalloc2(nc,&rows,col*nc*nc,&cols);CHKERRQ(ierr);
1762     ierr = PetscCalloc1(col*nc*nc,&values);CHKERRQ(ierr);
1763     for (i=xs; i<xs+nx; i++) {
1764       istart = PetscMax(-s,gxs - i);
1765       iend   = PetscMin(s,gxs + gnx - i - 1);
1766       slot   = i - gxs;
1767 
1768       cnt = 0;
1769       for (l=0; l<nc; l++) {
1770         for (i1=istart; i1<iend+1; i1++) {
1771           cols[cnt++] = l + nc*(slot + i1);
1772         }
1773         rows[l] = l + nc*(slot);
1774       }
1775       ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1776     }
1777     ierr = PetscFree(values);CHKERRQ(ierr);
1778     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1779     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1780     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1781     ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1782   }
1783   PetscFunctionReturn(0);
1784 }
1785 
1786 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da,Mat J)
1787 {
1788   PetscErrorCode         ierr;
1789   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1790   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
1791   PetscInt               istart,iend,jstart,jend,ii,jj;
1792   MPI_Comm               comm;
1793   PetscScalar            *values;
1794   DMBoundaryType         bx,by;
1795   DMDAStencilType        st;
1796   ISLocalToGlobalMapping ltog;
1797 
1798   PetscFunctionBegin;
1799   /*
1800      nc - number of components per grid point
1801      col - number of colors needed in one direction for single component problem
1802   */
1803   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
1804   col  = 2*s + 1;
1805 
1806   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
1807   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
1808   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1809 
1810   ierr = PetscMalloc1(col*col*nc*nc,&cols);CHKERRQ(ierr);
1811 
1812   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1813 
1814   /* determine the matrix preallocation information */
1815   ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
1816   for (i=xs; i<xs+nx; i++) {
1817     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1818     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1819     for (j=ys; j<ys+ny; j++) {
1820       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1821       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1822       slot   = i - gxs + gnx*(j - gys);
1823 
1824       /* Find block columns in block row */
1825       cnt = 0;
1826       for (ii=istart; ii<iend+1; ii++) {
1827         for (jj=jstart; jj<jend+1; jj++) {
1828           if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1829             cols[cnt++] = slot + ii + gnx*jj;
1830           }
1831         }
1832       }
1833       ierr = MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1834     }
1835   }
1836   ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1837   ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1838   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1839 
1840   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1841 
1842   /*
1843     For each node in the grid: we get the neighbors in the local (on processor ordering
1844     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1845     PETSc ordering.
1846   */
1847   if (!da->prealloc_only) {
1848     ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr);
1849     for (i=xs; i<xs+nx; i++) {
1850       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1851       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1852       for (j=ys; j<ys+ny; j++) {
1853         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1854         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1855         slot = i - gxs + gnx*(j - gys);
1856         cnt  = 0;
1857         for (ii=istart; ii<iend+1; ii++) {
1858           for (jj=jstart; jj<jend+1; jj++) {
1859             if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1860               cols[cnt++] = slot + ii + gnx*jj;
1861             }
1862           }
1863         }
1864         ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1865       }
1866     }
1867     ierr = PetscFree(values);CHKERRQ(ierr);
1868     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1869     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1870     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1871   }
1872   ierr = PetscFree(cols);CHKERRQ(ierr);
1873   PetscFunctionReturn(0);
1874 }
1875 
1876 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da,Mat J)
1877 {
1878   PetscErrorCode         ierr;
1879   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1880   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
1881   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1882   MPI_Comm               comm;
1883   PetscScalar            *values;
1884   DMBoundaryType         bx,by,bz;
1885   DMDAStencilType        st;
1886   ISLocalToGlobalMapping ltog;
1887 
1888   PetscFunctionBegin;
1889   /*
1890          nc - number of components per grid point
1891          col - number of colors needed in one direction for single component problem
1892 
1893   */
1894   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1895   col  = 2*s + 1;
1896 
1897   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1898   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1899   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1900 
1901   ierr = PetscMalloc1(col*col*col,&cols);CHKERRQ(ierr);
1902 
1903   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1904 
1905   /* determine the matrix preallocation information */
1906   ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1907   for (i=xs; i<xs+nx; i++) {
1908     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1909     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1910     for (j=ys; j<ys+ny; j++) {
1911       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1912       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1913       for (k=zs; k<zs+nz; k++) {
1914         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1915         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1916 
1917         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1918 
1919         /* Find block columns in block row */
1920         cnt = 0;
1921         for (ii=istart; ii<iend+1; ii++) {
1922           for (jj=jstart; jj<jend+1; jj++) {
1923             for (kk=kstart; kk<kend+1; kk++) {
1924               if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1925                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1926               }
1927             }
1928           }
1929         }
1930         ierr = MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1931       }
1932     }
1933   }
1934   ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1935   ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1936   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1937 
1938   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1939 
1940   /*
1941     For each node in the grid: we get the neighbors in the local (on processor ordering
1942     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1943     PETSc ordering.
1944   */
1945   if (!da->prealloc_only) {
1946     ierr = PetscCalloc1(col*col*col*nc*nc,&values);CHKERRQ(ierr);
1947     for (i=xs; i<xs+nx; i++) {
1948       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1949       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1950       for (j=ys; j<ys+ny; j++) {
1951         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1952         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1953         for (k=zs; k<zs+nz; k++) {
1954           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1955           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1956 
1957           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1958 
1959           cnt = 0;
1960           for (ii=istart; ii<iend+1; ii++) {
1961             for (jj=jstart; jj<jend+1; jj++) {
1962               for (kk=kstart; kk<kend+1; kk++) {
1963                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1964                   cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1965                 }
1966               }
1967             }
1968           }
1969           ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1970         }
1971       }
1972     }
1973     ierr = PetscFree(values);CHKERRQ(ierr);
1974     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1975     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1976     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1977   }
1978   ierr = PetscFree(cols);CHKERRQ(ierr);
1979   PetscFunctionReturn(0);
1980 }
1981 
1982 /*
1983   This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to
1984   identify in the local ordering with periodic domain.
1985 */
1986 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog,PetscInt *row,PetscInt *cnt,PetscInt col[])
1987 {
1988   PetscErrorCode ierr;
1989   PetscInt       i,n;
1990 
1991   PetscFunctionBegin;
1992   ierr = ISLocalToGlobalMappingApplyBlock(ltog,1,row,row);CHKERRQ(ierr);
1993   ierr = ISLocalToGlobalMappingApplyBlock(ltog,*cnt,col,col);CHKERRQ(ierr);
1994   for (i=0,n=0; i<*cnt; i++) {
1995     if (col[i] >= *row) col[n++] = col[i];
1996   }
1997   *cnt = n;
1998   PetscFunctionReturn(0);
1999 }
2000 
2001 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da,Mat J)
2002 {
2003   PetscErrorCode         ierr;
2004   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
2005   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
2006   PetscInt               istart,iend,jstart,jend,ii,jj;
2007   MPI_Comm               comm;
2008   PetscScalar            *values;
2009   DMBoundaryType         bx,by;
2010   DMDAStencilType        st;
2011   ISLocalToGlobalMapping ltog;
2012 
2013   PetscFunctionBegin;
2014   /*
2015      nc - number of components per grid point
2016      col - number of colors needed in one direction for single component problem
2017   */
2018   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
2019   col  = 2*s + 1;
2020 
2021   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
2022   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
2023   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
2024 
2025   ierr = PetscMalloc1(col*col*nc*nc,&cols);CHKERRQ(ierr);
2026 
2027   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
2028 
2029   /* determine the matrix preallocation information */
2030   ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
2031   for (i=xs; i<xs+nx; i++) {
2032     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2033     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2034     for (j=ys; j<ys+ny; j++) {
2035       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2036       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2037       slot   = i - gxs + gnx*(j - gys);
2038 
2039       /* Find block columns in block row */
2040       cnt = 0;
2041       for (ii=istart; ii<iend+1; ii++) {
2042         for (jj=jstart; jj<jend+1; jj++) {
2043           if (st == DMDA_STENCIL_BOX || !ii || !jj) {
2044             cols[cnt++] = slot + ii + gnx*jj;
2045           }
2046         }
2047       }
2048       ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
2049       ierr = MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
2050     }
2051   }
2052   ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
2053   ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
2054   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2055 
2056   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
2057 
2058   /*
2059     For each node in the grid: we get the neighbors in the local (on processor ordering
2060     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2061     PETSc ordering.
2062   */
2063   if (!da->prealloc_only) {
2064     ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr);
2065     for (i=xs; i<xs+nx; i++) {
2066       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2067       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2068       for (j=ys; j<ys+ny; j++) {
2069         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2070         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2071         slot   = i - gxs + gnx*(j - gys);
2072 
2073         /* Find block columns in block row */
2074         cnt = 0;
2075         for (ii=istart; ii<iend+1; ii++) {
2076           for (jj=jstart; jj<jend+1; jj++) {
2077             if (st == DMDA_STENCIL_BOX || !ii || !jj) {
2078               cols[cnt++] = slot + ii + gnx*jj;
2079             }
2080           }
2081         }
2082         ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
2083         ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
2084       }
2085     }
2086     ierr = PetscFree(values);CHKERRQ(ierr);
2087     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2088     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2089     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2090   }
2091   ierr = PetscFree(cols);CHKERRQ(ierr);
2092   PetscFunctionReturn(0);
2093 }
2094 
2095 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da,Mat J)
2096 {
2097   PetscErrorCode         ierr;
2098   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
2099   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
2100   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
2101   MPI_Comm               comm;
2102   PetscScalar            *values;
2103   DMBoundaryType         bx,by,bz;
2104   DMDAStencilType        st;
2105   ISLocalToGlobalMapping ltog;
2106 
2107   PetscFunctionBegin;
2108   /*
2109      nc - number of components per grid point
2110      col - number of colors needed in one direction for single component problem
2111   */
2112   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
2113   col  = 2*s + 1;
2114 
2115   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
2116   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
2117   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
2118 
2119   /* create the matrix */
2120   ierr = PetscMalloc1(col*col*col,&cols);CHKERRQ(ierr);
2121 
2122   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
2123 
2124   /* determine the matrix preallocation information */
2125   ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
2126   for (i=xs; i<xs+nx; i++) {
2127     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2128     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2129     for (j=ys; j<ys+ny; j++) {
2130       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2131       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2132       for (k=zs; k<zs+nz; k++) {
2133         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2134         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2135 
2136         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2137 
2138         /* Find block columns in block row */
2139         cnt = 0;
2140         for (ii=istart; ii<iend+1; ii++) {
2141           for (jj=jstart; jj<jend+1; jj++) {
2142             for (kk=kstart; kk<kend+1; kk++) {
2143               if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
2144                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
2145               }
2146             }
2147           }
2148         }
2149         ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
2150         ierr = MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
2151       }
2152     }
2153   }
2154   ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
2155   ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
2156   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2157 
2158   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
2159 
2160   /*
2161     For each node in the grid: we get the neighbors in the local (on processor ordering
2162     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2163     PETSc ordering.
2164   */
2165   if (!da->prealloc_only) {
2166     ierr = PetscCalloc1(col*col*col*nc*nc,&values);CHKERRQ(ierr);
2167     for (i=xs; i<xs+nx; i++) {
2168       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2169       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2170       for (j=ys; j<ys+ny; j++) {
2171         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2172         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2173         for (k=zs; k<zs+nz; k++) {
2174           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2175           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2176 
2177           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2178 
2179           cnt = 0;
2180           for (ii=istart; ii<iend+1; ii++) {
2181             for (jj=jstart; jj<jend+1; jj++) {
2182               for (kk=kstart; kk<kend+1; kk++) {
2183                 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
2184                   cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
2185                 }
2186               }
2187             }
2188           }
2189           ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
2190           ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
2191         }
2192       }
2193     }
2194     ierr = PetscFree(values);CHKERRQ(ierr);
2195     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2196     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2197     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2198   }
2199   ierr = PetscFree(cols);CHKERRQ(ierr);
2200   PetscFunctionReturn(0);
2201 }
2202 
2203 /* ---------------------------------------------------------------------------------*/
2204 
2205 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da,Mat J)
2206 {
2207   PetscErrorCode         ierr;
2208   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
2209   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt, maxcnt = 0,l,p,*dnz,*onz;
2210   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
2211   DM_DA                  *dd = (DM_DA*)da->data;
2212   PetscInt               ifill_col,*dfill = dd->dfill,*ofill = dd->ofill;
2213   MPI_Comm               comm;
2214   PetscScalar            *values;
2215   DMBoundaryType         bx,by,bz;
2216   ISLocalToGlobalMapping ltog;
2217   DMDAStencilType        st;
2218   PetscBool              removedups = PETSC_FALSE;
2219 
2220   PetscFunctionBegin;
2221   /*
2222          nc - number of components per grid point
2223          col - number of colors needed in one direction for single component problem
2224 
2225   */
2226   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
2227   col  = 2*s + 1;
2228   if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
2229                  by 2*stencil_width + 1\n");
2230   if (by == DM_BOUNDARY_PERIODIC && (n % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
2231                  by 2*stencil_width + 1\n");
2232   if (bz == DM_BOUNDARY_PERIODIC && (p % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
2233                  by 2*stencil_width + 1\n");
2234 
2235   /*
2236        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
2237        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
2238   */
2239   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
2240   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
2241   if (P == 1 && 2*s >= p) removedups = PETSC_TRUE;
2242 
2243   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
2244   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
2245   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
2246 
2247   ierr = PetscMalloc1(col*col*col*nc,&cols);CHKERRQ(ierr);
2248   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
2249 
2250   /* determine the matrix preallocation information */
2251   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
2252 
2253   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
2254   for (i=xs; i<xs+nx; i++) {
2255     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2256     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2257     for (j=ys; j<ys+ny; j++) {
2258       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2259       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2260       for (k=zs; k<zs+nz; k++) {
2261         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2262         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2263 
2264         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2265 
2266         for (l=0; l<nc; l++) {
2267           cnt = 0;
2268           for (ii=istart; ii<iend+1; ii++) {
2269             for (jj=jstart; jj<jend+1; jj++) {
2270               for (kk=kstart; kk<kend+1; kk++) {
2271                 if (ii || jj || kk) {
2272                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
2273                     for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2274                   }
2275                 } else {
2276                   if (dfill) {
2277                     for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2278                   } else {
2279                     for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2280                   }
2281                 }
2282               }
2283             }
2284           }
2285           row  = l + nc*(slot);
2286           maxcnt = PetscMax(maxcnt,cnt);
2287           if (removedups) {
2288             ierr = MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
2289           } else {
2290             ierr = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
2291           }
2292         }
2293       }
2294     }
2295   }
2296   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
2297   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
2298   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2299   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
2300 
2301   /*
2302     For each node in the grid: we get the neighbors in the local (on processor ordering
2303     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2304     PETSc ordering.
2305   */
2306   if (!da->prealloc_only) {
2307     ierr = PetscCalloc1(maxcnt,&values);CHKERRQ(ierr);
2308     for (i=xs; i<xs+nx; i++) {
2309       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2310       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2311       for (j=ys; j<ys+ny; j++) {
2312         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2313         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2314         for (k=zs; k<zs+nz; k++) {
2315           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2316           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2317 
2318           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2319 
2320           for (l=0; l<nc; l++) {
2321             cnt = 0;
2322             for (ii=istart; ii<iend+1; ii++) {
2323               for (jj=jstart; jj<jend+1; jj++) {
2324                 for (kk=kstart; kk<kend+1; kk++) {
2325                   if (ii || jj || kk) {
2326                     if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
2327                       for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2328                     }
2329                   } else {
2330                     if (dfill) {
2331                       for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2332                     } else {
2333                       for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2334                     }
2335                   }
2336                 }
2337               }
2338             }
2339             row  = l + nc*(slot);
2340             ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
2341           }
2342         }
2343       }
2344     }
2345     ierr = PetscFree(values);CHKERRQ(ierr);
2346     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2347     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2348     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2349   }
2350   ierr = PetscFree(cols);CHKERRQ(ierr);
2351   PetscFunctionReturn(0);
2352 }
2353