xref: /petsc/src/dm/impls/da/fdda.c (revision 87050cfd2c0579ea516b8fcfef1cf8502bc01bdc)
1 
2 #include <petsc/private/dmdaimpl.h> /*I      "petscdmda.h"     I*/
3 #include <petscmat.h>
4 
5 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM,ISColoringType,ISColoring*);
6 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM,ISColoringType,ISColoring*);
7 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM,ISColoringType,ISColoring*);
8 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM,ISColoringType,ISColoring*);
9 
10 /*
11    For ghost i that may be negative or greater than the upper bound this
12   maps it into the 0:m-1 range using periodicity
13 */
14 #define SetInRange(i,m) ((i < 0) ? m+i : ((i >= m) ? i-m : i))
15 
16 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill,PetscInt w,PetscInt **rfill)
17 {
18   PetscErrorCode ierr;
19   PetscInt       i,j,nz,*fill;
20 
21   PetscFunctionBegin;
22   if (!dfill) PetscFunctionReturn(0);
23 
24   /* count number nonzeros */
25   nz = 0;
26   for (i=0; i<w; i++) {
27     for (j=0; j<w; j++) {
28       if (dfill[w*i+j]) nz++;
29     }
30   }
31   ierr = PetscMalloc1(nz + w + 1,&fill);CHKERRQ(ierr);
32   /* construct modified CSR storage of nonzero structure */
33   /*  fill[0 -- w] marks starts of each row of column indices (and end of last row)
34    so fill[1] - fill[0] gives number of nonzeros in first row etc */
35   nz = w + 1;
36   for (i=0; i<w; i++) {
37     fill[i] = nz;
38     for (j=0; j<w; j++) {
39       if (dfill[w*i+j]) {
40         fill[nz] = j;
41         nz++;
42       }
43     }
44   }
45   fill[w] = nz;
46 
47   *rfill = fill;
48   PetscFunctionReturn(0);
49 }
50 
51 
52 static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse,PetscInt w,PetscInt **rfill)
53 {
54   PetscErrorCode ierr;
55   PetscInt       nz;
56 
57   PetscFunctionBegin;
58   if (!dfillsparse) PetscFunctionReturn(0);
59 
60   /* Determine number of non-zeros */
61   nz = (dfillsparse[w] - w - 1);
62 
63   /* Allocate space for our copy of the given sparse matrix representation. */
64   ierr = PetscMalloc1(nz + w + 1,rfill);CHKERRQ(ierr);
65   ierr = PetscMemcpy(*rfill,dfillsparse,(nz+w+1)*sizeof(PetscInt));
66   PetscFunctionReturn(0);
67 }
68 
69 
70 static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd)
71 {
72   PetscErrorCode ierr;
73   PetscInt       i,k,cnt = 1;
74 
75   PetscFunctionBegin;
76 
77   /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of
78    columns to the left with any nonzeros in them plus 1 */
79   ierr = PetscCalloc1(dd->w,&dd->ofillcols);CHKERRQ(ierr);
80   for (i=0; i<dd->w; i++) {
81     for (k=dd->ofill[i]; k<dd->ofill[i+1]; k++) dd->ofillcols[dd->ofill[k]] = 1;
82   }
83   for (i=0; i<dd->w; i++) {
84     if (dd->ofillcols[i]) {
85       dd->ofillcols[i] = cnt++;
86     }
87   }
88   PetscFunctionReturn(0);
89 }
90 
91 
92 
93 /*@
94     DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem
95     of the matrix returned by DMCreateMatrix().
96 
97     Logically Collective on DMDA
98 
99     Input Parameter:
100 +   da - the distributed array
101 .   dfill - the fill pattern in the diagonal block (may be NULL, means use dense block)
102 -   ofill - the fill pattern in the off-diagonal blocks
103 
104 
105     Level: developer
106 
107     Notes:
108     This only makes sense when you are doing multicomponent problems but using the
109        MPIAIJ matrix format
110 
111            The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries
112        representing coupling and 0 entries for missing coupling. For example
113 $             dfill[9] = {1, 0, 0,
114 $                         1, 1, 0,
115 $                         0, 1, 1}
116        means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with
117        itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the
118        diagonal block).
119 
120      DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
121      can be represented in the dfill, ofill format
122 
123    Contributed by Glenn Hammond
124 
125 .seealso DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly()
126 
127 @*/
128 PetscErrorCode  DMDASetBlockFills(DM da,const PetscInt *dfill,const PetscInt *ofill)
129 {
130   DM_DA          *dd = (DM_DA*)da->data;
131   PetscErrorCode ierr;
132 
133   PetscFunctionBegin;
134   /* save the given dfill and ofill information */
135   ierr = DMDASetBlockFills_Private(dfill,dd->w,&dd->dfill);CHKERRQ(ierr);
136   ierr = DMDASetBlockFills_Private(ofill,dd->w,&dd->ofill);CHKERRQ(ierr);
137 
138   /* count nonzeros in ofill columns */
139   ierr = DMDASetBlockFills_Private2(dd);CHKERRQ(ierr);
140 
141   PetscFunctionReturn(0);
142 }
143 
144 
145 /*@
146     DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem
147     of the matrix returned by DMCreateMatrix(), using sparse representations
148     of fill patterns.
149 
150     Logically Collective on DMDA
151 
152     Input Parameter:
153 +   da - the distributed array
154 .   dfill - the sparse fill pattern in the diagonal block (may be NULL, means use dense block)
155 -   ofill - the sparse fill pattern in the off-diagonal blocks
156 
157 
158     Level: developer
159 
160     Notes: This only makes sense when you are doing multicomponent problems but using the
161        MPIAIJ matrix format
162 
163            The format for dfill and ofill is a sparse representation of a
164            dof-by-dof matrix with 1 entries representing coupling and 0 entries
165            for missing coupling.  The sparse representation is a 1 dimensional
166            array of length nz + dof + 1, where nz is the number of non-zeros in
167            the matrix.  The first dof entries in the array give the
168            starting array indices of each row's items in the rest of the array,
169            the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array)
170            and the remaining nz items give the column indices of each of
171            the 1s within the logical 2D matrix.  Each row's items within
172            the array are the column indices of the 1s within that row
173            of the 2D matrix.  PETSc developers may recognize that this is the
174            same format as that computed by the DMDASetBlockFills_Private()
175            function from a dense 2D matrix representation.
176 
177      DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
178      can be represented in the dfill, ofill format
179 
180    Contributed by Philip C. Roth
181 
182 .seealso DMDASetBlockFills(), DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly()
183 
184 @*/
185 PetscErrorCode  DMDASetBlockFillsSparse(DM da,const PetscInt *dfillsparse,const PetscInt *ofillsparse)
186 {
187   DM_DA          *dd = (DM_DA*)da->data;
188   PetscErrorCode ierr;
189 
190   PetscFunctionBegin;
191   /* save the given dfill and ofill information */
192   ierr = DMDASetBlockFillsSparse_Private(dfillsparse,dd->w,&dd->dfill);CHKERRQ(ierr);
193   ierr = DMDASetBlockFillsSparse_Private(ofillsparse,dd->w,&dd->ofill);CHKERRQ(ierr);
194 
195   /* count nonzeros in ofill columns */
196   ierr = DMDASetBlockFills_Private2(dd);CHKERRQ(ierr);
197 
198   PetscFunctionReturn(0);
199 }
200 
201 
202 PetscErrorCode  DMCreateColoring_DA(DM da,ISColoringType ctype,ISColoring *coloring)
203 {
204   PetscErrorCode   ierr;
205   PetscInt         dim,m,n,p,nc;
206   DMBoundaryType bx,by,bz;
207   MPI_Comm         comm;
208   PetscMPIInt      size;
209   PetscBool        isBAIJ;
210   DM_DA            *dd = (DM_DA*)da->data;
211 
212   PetscFunctionBegin;
213   /*
214                                   m
215           ------------------------------------------------------
216          |                                                     |
217          |                                                     |
218          |               ----------------------                |
219          |               |                    |                |
220       n  |           yn  |                    |                |
221          |               |                    |                |
222          |               .---------------------                |
223          |             (xs,ys)     xn                          |
224          |            .                                        |
225          |         (gxs,gys)                                   |
226          |                                                     |
227           -----------------------------------------------------
228   */
229 
230   /*
231          nc - number of components per grid point
232          col - number of colors needed in one direction for single component problem
233 
234   */
235   ierr = DMDAGetInfo(da,&dim,0,0,0,&m,&n,&p,&nc,0,&bx,&by,&bz,0);CHKERRQ(ierr);
236 
237   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
238   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
239   if (ctype == IS_COLORING_LOCAL) {
240     if (size == 1) {
241       ctype = IS_COLORING_GLOBAL;
242     } else if (dim > 1) {
243       if ((m==1 && bx == DM_BOUNDARY_PERIODIC) || (n==1 && by == DM_BOUNDARY_PERIODIC) || (p==1 && bz == DM_BOUNDARY_PERIODIC)) {
244         SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain  on the same process");
245       }
246     }
247   }
248 
249   /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ
250      matrices is for the blocks, not the individual matrix elements  */
251   ierr = PetscStrcmp(da->mattype,MATBAIJ,&isBAIJ);CHKERRQ(ierr);
252   if (!isBAIJ) {ierr = PetscStrcmp(da->mattype,MATMPIBAIJ,&isBAIJ);CHKERRQ(ierr);}
253   if (!isBAIJ) {ierr = PetscStrcmp(da->mattype,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);}
254   if (isBAIJ) {
255     dd->w  = 1;
256     dd->xs = dd->xs/nc;
257     dd->xe = dd->xe/nc;
258     dd->Xs = dd->Xs/nc;
259     dd->Xe = dd->Xe/nc;
260   }
261 
262   /*
263      We do not provide a getcoloring function in the DMDA operations because
264    the basic DMDA does not know about matrices. We think of DMDA as being more
265    more low-level then matrices.
266   */
267   if (dim == 1) {
268     ierr = DMCreateColoring_DA_1d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
269   } else if (dim == 2) {
270     ierr =  DMCreateColoring_DA_2d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
271   } else if (dim == 3) {
272     ierr =  DMCreateColoring_DA_3d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
273   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not done for %D dimension, send us mail petsc-maint@mcs.anl.gov for code",dim);
274   if (isBAIJ) {
275     dd->w  = nc;
276     dd->xs = dd->xs*nc;
277     dd->xe = dd->xe*nc;
278     dd->Xs = dd->Xs*nc;
279     dd->Xe = dd->Xe*nc;
280   }
281   PetscFunctionReturn(0);
282 }
283 
284 /* ---------------------------------------------------------------------------------*/
285 
286 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
287 {
288   PetscErrorCode   ierr;
289   PetscInt         xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,M,N,dim,s,k,nc,col;
290   PetscInt         ncolors;
291   MPI_Comm         comm;
292   DMBoundaryType bx,by;
293   DMDAStencilType  st;
294   ISColoringValue  *colors;
295   DM_DA            *dd = (DM_DA*)da->data;
296 
297   PetscFunctionBegin;
298   /*
299          nc - number of components per grid point
300          col - number of colors needed in one direction for single component problem
301 
302   */
303   ierr = DMDAGetInfo(da,&dim,&m,&n,0,&M,&N,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
304   col  = 2*s + 1;
305   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
306   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
307   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
308 
309   /* special case as taught to us by Paul Hovland */
310   if (st == DMDA_STENCIL_STAR && s == 1) {
311     ierr = DMCreateColoring_DA_2d_5pt_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
312   } else {
313 
314     if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ2(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X (%d) is divisible\n\
315                                                             by 2*stencil_width + 1 (%d)\n", m, col);
316     if (by == DM_BOUNDARY_PERIODIC && (n % col)) SETERRQ2(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y (%d) is divisible\n\
317                                                             by 2*stencil_width + 1 (%d)\n", n, col);
318     if (ctype == IS_COLORING_GLOBAL) {
319       if (!dd->localcoloring) {
320         ierr = PetscMalloc1(nc*nx*ny,&colors);CHKERRQ(ierr);
321         ii   = 0;
322         for (j=ys; j<ys+ny; j++) {
323           for (i=xs; i<xs+nx; i++) {
324             for (k=0; k<nc; k++) {
325               colors[ii++] = k + nc*((i % col) + col*(j % col));
326             }
327           }
328         }
329         ncolors = nc + nc*(col-1 + col*(col-1));
330         ierr    = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
331       }
332       *coloring = dd->localcoloring;
333     } else if (ctype == IS_COLORING_LOCAL) {
334       if (!dd->ghostedcoloring) {
335         ierr = PetscMalloc1(nc*gnx*gny,&colors);CHKERRQ(ierr);
336         ii   = 0;
337         for (j=gys; j<gys+gny; j++) {
338           for (i=gxs; i<gxs+gnx; i++) {
339             for (k=0; k<nc; k++) {
340               /* the complicated stuff is to handle periodic boundaries */
341               colors[ii++] = k + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col));
342             }
343           }
344         }
345         ncolors = nc + nc*(col - 1 + col*(col-1));
346         ierr    = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
347         /* PetscIntView(ncolors,(PetscInt*)colors,0); */
348 
349         ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
350       }
351       *coloring = dd->ghostedcoloring;
352     } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
353   }
354   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
355   PetscFunctionReturn(0);
356 }
357 
358 /* ---------------------------------------------------------------------------------*/
359 
360 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
361 {
362   PetscErrorCode   ierr;
363   PetscInt         xs,ys,nx,ny,i,j,gxs,gys,gnx,gny,m,n,p,dim,s,k,nc,col,zs,gzs,ii,l,nz,gnz,M,N,P;
364   PetscInt         ncolors;
365   MPI_Comm         comm;
366   DMBoundaryType bx,by,bz;
367   DMDAStencilType  st;
368   ISColoringValue  *colors;
369   DM_DA            *dd = (DM_DA*)da->data;
370 
371   PetscFunctionBegin;
372   /*
373          nc - number of components per grid point
374          col - number of colors needed in one direction for single component problem
375 
376   */
377   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
378   col  = 2*s + 1;
379   if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
380                                                          by 2*stencil_width + 1\n");
381   if (by == DM_BOUNDARY_PERIODIC && (n % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
382                                                          by 2*stencil_width + 1\n");
383   if (bz == DM_BOUNDARY_PERIODIC && (p % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
384                                                          by 2*stencil_width + 1\n");
385 
386   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
387   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
388   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
389 
390   /* create the coloring */
391   if (ctype == IS_COLORING_GLOBAL) {
392     if (!dd->localcoloring) {
393       ierr = PetscMalloc1(nc*nx*ny*nz,&colors);CHKERRQ(ierr);
394       ii   = 0;
395       for (k=zs; k<zs+nz; k++) {
396         for (j=ys; j<ys+ny; j++) {
397           for (i=xs; i<xs+nx; i++) {
398             for (l=0; l<nc; l++) {
399               colors[ii++] = l + nc*((i % col) + col*(j % col) + col*col*(k % col));
400             }
401           }
402         }
403       }
404       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
405       ierr    = ISColoringCreate(comm,ncolors,nc*nx*ny*nz,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
406     }
407     *coloring = dd->localcoloring;
408   } else if (ctype == IS_COLORING_LOCAL) {
409     if (!dd->ghostedcoloring) {
410       ierr = PetscMalloc1(nc*gnx*gny*gnz,&colors);CHKERRQ(ierr);
411       ii   = 0;
412       for (k=gzs; k<gzs+gnz; k++) {
413         for (j=gys; j<gys+gny; j++) {
414           for (i=gxs; i<gxs+gnx; i++) {
415             for (l=0; l<nc; l++) {
416               /* the complicated stuff is to handle periodic boundaries */
417               colors[ii++] = l + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col) + col*col*(SetInRange(k,p) % col));
418             }
419           }
420         }
421       }
422       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
423       ierr    = ISColoringCreate(comm,ncolors,nc*gnx*gny*gnz,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
424       ierr    = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
425     }
426     *coloring = dd->ghostedcoloring;
427   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
428   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
429   PetscFunctionReturn(0);
430 }
431 
432 /* ---------------------------------------------------------------------------------*/
433 
434 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
435 {
436   PetscErrorCode   ierr;
437   PetscInt         xs,nx,i,i1,gxs,gnx,l,m,M,dim,s,nc,col;
438   PetscInt         ncolors;
439   MPI_Comm         comm;
440   DMBoundaryType bx;
441   ISColoringValue  *colors;
442   DM_DA            *dd = (DM_DA*)da->data;
443 
444   PetscFunctionBegin;
445   /*
446          nc - number of components per grid point
447          col - number of colors needed in one direction for single component problem
448 
449   */
450   ierr = DMDAGetInfo(da,&dim,&m,0,0,&M,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr);
451   col  = 2*s + 1;
452 
453   if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ2(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points %d is divisible\n\
454                                                           by 2*stencil_width + 1 %d\n",(int)m,(int)col);
455 
456   ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
457   ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
458   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
459 
460   /* create the coloring */
461   if (ctype == IS_COLORING_GLOBAL) {
462     if (!dd->localcoloring) {
463       ierr = PetscMalloc1(nc*nx,&colors);CHKERRQ(ierr);
464       if (dd->ofillcols) {
465         PetscInt tc = 0;
466         for (i=0; i<nc; i++) tc += (PetscInt) (dd->ofillcols[i] > 0);
467         i1 = 0;
468         for (i=xs; i<xs+nx; i++) {
469           for (l=0; l<nc; l++) {
470             if (dd->ofillcols[l] && (i % col)) {
471               colors[i1++] =  nc - 1 + tc*((i % col) - 1) + dd->ofillcols[l];
472             } else {
473               colors[i1++] = l;
474             }
475           }
476         }
477         ncolors = nc + 2*s*tc;
478       } else {
479         i1 = 0;
480         for (i=xs; i<xs+nx; i++) {
481           for (l=0; l<nc; l++) {
482             colors[i1++] = l + nc*(i % col);
483           }
484         }
485         ncolors = nc + nc*(col-1);
486       }
487       ierr = ISColoringCreate(comm,ncolors,nc*nx,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
488     }
489     *coloring = dd->localcoloring;
490   } else if (ctype == IS_COLORING_LOCAL) {
491     if (!dd->ghostedcoloring) {
492       ierr = PetscMalloc1(nc*gnx,&colors);CHKERRQ(ierr);
493       i1   = 0;
494       for (i=gxs; i<gxs+gnx; i++) {
495         for (l=0; l<nc; l++) {
496           /* the complicated stuff is to handle periodic boundaries */
497           colors[i1++] = l + nc*(SetInRange(i,m) % col);
498         }
499       }
500       ncolors = nc + nc*(col-1);
501       ierr    = ISColoringCreate(comm,ncolors,nc*gnx,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
502       ierr    = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
503     }
504     *coloring = dd->ghostedcoloring;
505   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
506   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
507   PetscFunctionReturn(0);
508 }
509 
510 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
511 {
512   PetscErrorCode   ierr;
513   PetscInt         xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,dim,s,k,nc;
514   PetscInt         ncolors;
515   MPI_Comm         comm;
516   DMBoundaryType bx,by;
517   ISColoringValue  *colors;
518   DM_DA            *dd = (DM_DA*)da->data;
519 
520   PetscFunctionBegin;
521   /*
522          nc - number of components per grid point
523          col - number of colors needed in one direction for single component problem
524 
525   */
526   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,0);CHKERRQ(ierr);
527   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
528   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
529   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
530 
531   if (bx == DM_BOUNDARY_PERIODIC && (m % 5)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible by 5\n");
532   if (by == DM_BOUNDARY_PERIODIC && (n % 5)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible by 5\n");
533 
534   /* create the coloring */
535   if (ctype == IS_COLORING_GLOBAL) {
536     if (!dd->localcoloring) {
537       ierr = PetscMalloc1(nc*nx*ny,&colors);CHKERRQ(ierr);
538       ii   = 0;
539       for (j=ys; j<ys+ny; j++) {
540         for (i=xs; i<xs+nx; i++) {
541           for (k=0; k<nc; k++) {
542             colors[ii++] = k + nc*((3*j+i) % 5);
543           }
544         }
545       }
546       ncolors = 5*nc;
547       ierr    = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
548     }
549     *coloring = dd->localcoloring;
550   } else if (ctype == IS_COLORING_LOCAL) {
551     if (!dd->ghostedcoloring) {
552       ierr = PetscMalloc1(nc*gnx*gny,&colors);CHKERRQ(ierr);
553       ii = 0;
554       for (j=gys; j<gys+gny; j++) {
555         for (i=gxs; i<gxs+gnx; i++) {
556           for (k=0; k<nc; k++) {
557             colors[ii++] = k + nc*((3*SetInRange(j,n) + SetInRange(i,m)) % 5);
558           }
559         }
560       }
561       ncolors = 5*nc;
562       ierr    = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
563       ierr    = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
564     }
565     *coloring = dd->ghostedcoloring;
566   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
567   PetscFunctionReturn(0);
568 }
569 
570 /* =========================================================================== */
571 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM,Mat);
572 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM,Mat);
573 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM,Mat);
574 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM,Mat);
575 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM,Mat);
576 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM,Mat);
577 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM,Mat);
578 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM,Mat);
579 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM,Mat);
580 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM,Mat);
581 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM,Mat);
582 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM,Mat);
583 extern PetscErrorCode DMCreateMatrix_DA_IS(DM,Mat);
584 
585 /*@C
586    MatSetupDM - Sets the DMDA that is to be used by the HYPRE_StructMatrix PETSc matrix
587 
588    Logically Collective on Mat
589 
590    Input Parameters:
591 +  mat - the matrix
592 -  da - the da
593 
594    Level: intermediate
595 
596 @*/
597 PetscErrorCode MatSetupDM(Mat mat,DM da)
598 {
599   PetscErrorCode ierr;
600 
601   PetscFunctionBegin;
602   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
603   PetscValidHeaderSpecificType(da,DM_CLASSID,1,DMDA);
604   ierr = PetscTryMethod(mat,"MatSetupDM_C",(Mat,DM),(mat,da));CHKERRQ(ierr);
605   PetscFunctionReturn(0);
606 }
607 
608 PetscErrorCode  MatView_MPI_DA(Mat A,PetscViewer viewer)
609 {
610   DM                da;
611   PetscErrorCode    ierr;
612   const char        *prefix;
613   Mat               Anatural;
614   AO                ao;
615   PetscInt          rstart,rend,*petsc,i;
616   IS                is;
617   MPI_Comm          comm;
618   PetscViewerFormat format;
619 
620   PetscFunctionBegin;
621   /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */
622   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
623   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(0);
624 
625   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
626   ierr = MatGetDM(A, &da);CHKERRQ(ierr);
627   if (!da) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
628 
629   ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);
630   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
631   ierr = PetscMalloc1(rend-rstart,&petsc);CHKERRQ(ierr);
632   for (i=rstart; i<rend; i++) petsc[i-rstart] = i;
633   ierr = AOApplicationToPetsc(ao,rend-rstart,petsc);CHKERRQ(ierr);
634   ierr = ISCreateGeneral(comm,rend-rstart,petsc,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
635 
636   /* call viewer on natural ordering */
637   ierr = MatCreateSubMatrix(A,is,is,MAT_INITIAL_MATRIX,&Anatural);CHKERRQ(ierr);
638   ierr = ISDestroy(&is);CHKERRQ(ierr);
639   ierr = PetscObjectGetOptionsPrefix((PetscObject)A,&prefix);CHKERRQ(ierr);
640   ierr = PetscObjectSetOptionsPrefix((PetscObject)Anatural,prefix);CHKERRQ(ierr);
641   ierr = PetscObjectSetName((PetscObject)Anatural,((PetscObject)A)->name);CHKERRQ(ierr);
642   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE;
643   ierr = MatView(Anatural,viewer);CHKERRQ(ierr);
644   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE;
645   ierr = MatDestroy(&Anatural);CHKERRQ(ierr);
646   PetscFunctionReturn(0);
647 }
648 
649 PetscErrorCode  MatLoad_MPI_DA(Mat A,PetscViewer viewer)
650 {
651   DM             da;
652   PetscErrorCode ierr;
653   Mat            Anatural,Aapp;
654   AO             ao;
655   PetscInt       rstart,rend,*app,i,m,n,M,N;
656   IS             is;
657   MPI_Comm       comm;
658 
659   PetscFunctionBegin;
660   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
661   ierr = MatGetDM(A, &da);CHKERRQ(ierr);
662   if (!da) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
663 
664   /* Load the matrix in natural ordering */
665   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Anatural);CHKERRQ(ierr);
666   ierr = MatSetType(Anatural,((PetscObject)A)->type_name);CHKERRQ(ierr);
667   ierr = MatGetSize(A,&M,&N);CHKERRQ(ierr);
668   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
669   ierr = MatSetSizes(Anatural,m,n,M,N);CHKERRQ(ierr);
670   ierr = MatLoad(Anatural,viewer);CHKERRQ(ierr);
671 
672   /* Map natural ordering to application ordering and create IS */
673   ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);
674   ierr = MatGetOwnershipRange(Anatural,&rstart,&rend);CHKERRQ(ierr);
675   ierr = PetscMalloc1(rend-rstart,&app);CHKERRQ(ierr);
676   for (i=rstart; i<rend; i++) app[i-rstart] = i;
677   ierr = AOPetscToApplication(ao,rend-rstart,app);CHKERRQ(ierr);
678   ierr = ISCreateGeneral(comm,rend-rstart,app,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
679 
680   /* Do permutation and replace header */
681   ierr = MatCreateSubMatrix(Anatural,is,is,MAT_INITIAL_MATRIX,&Aapp);CHKERRQ(ierr);
682   ierr = MatHeaderReplace(A,&Aapp);CHKERRQ(ierr);
683   ierr = ISDestroy(&is);CHKERRQ(ierr);
684   ierr = MatDestroy(&Anatural);CHKERRQ(ierr);
685   PetscFunctionReturn(0);
686 }
687 
688 PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J)
689 {
690   PetscErrorCode ierr;
691   PetscInt       dim,dof,nx,ny,nz,dims[3],starts[3],M,N,P;
692   Mat            A;
693   MPI_Comm       comm;
694   MatType        Atype;
695   PetscSection   section, sectionGlobal;
696   void           (*aij)(void)=NULL,(*baij)(void)=NULL,(*sbaij)(void)=NULL,(*sell)(void)=NULL,(*is)(void)=NULL;
697   MatType        mtype;
698   PetscMPIInt    size;
699   DM_DA          *dd = (DM_DA*)da->data;
700 
701   PetscFunctionBegin;
702   ierr = MatInitializePackage();CHKERRQ(ierr);
703   mtype = da->mattype;
704 
705   ierr = DMGetSection(da, &section);CHKERRQ(ierr);
706   if (section) {
707     PetscInt  bs = -1;
708     PetscInt  localSize;
709     PetscBool isShell, isBlock, isSeqBlock, isMPIBlock, isSymBlock, isSymSeqBlock, isSymMPIBlock, isSymmetric;
710 
711     ierr = DMGetGlobalSection(da, &sectionGlobal);CHKERRQ(ierr);
712     ierr = PetscSectionGetConstrainedStorageSize(sectionGlobal, &localSize);CHKERRQ(ierr);
713     ierr = MatCreate(PetscObjectComm((PetscObject)da),&A);CHKERRQ(ierr);
714     ierr = MatSetSizes(A,localSize,localSize,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
715     ierr = MatSetType(A,mtype);CHKERRQ(ierr);
716     ierr = PetscStrcmp(mtype,MATSHELL,&isShell);CHKERRQ(ierr);
717     ierr = PetscStrcmp(mtype,MATBAIJ,&isBlock);CHKERRQ(ierr);
718     ierr = PetscStrcmp(mtype,MATSEQBAIJ,&isSeqBlock);CHKERRQ(ierr);
719     ierr = PetscStrcmp(mtype,MATMPIBAIJ,&isMPIBlock);CHKERRQ(ierr);
720     ierr = PetscStrcmp(mtype,MATSBAIJ,&isSymBlock);CHKERRQ(ierr);
721     ierr = PetscStrcmp(mtype,MATSEQSBAIJ,&isSymSeqBlock);CHKERRQ(ierr);
722     ierr = PetscStrcmp(mtype,MATMPISBAIJ,&isSymMPIBlock);CHKERRQ(ierr);
723     /* Check for symmetric storage */
724     isSymmetric = (PetscBool) (isSymBlock || isSymSeqBlock || isSymMPIBlock);
725     if (isSymmetric) {
726       ierr = MatSetOption(*J, MAT_IGNORE_LOWER_TRIANGULAR, PETSC_TRUE);CHKERRQ(ierr);
727     }
728     if (!isShell) {
729       PetscInt *dnz, *onz, *dnzu, *onzu, bsLocal;
730 
731       if (bs < 0) {
732         if (isBlock || isSeqBlock || isMPIBlock || isSymBlock || isSymSeqBlock || isSymMPIBlock) {
733           PetscInt pStart, pEnd, p, dof;
734 
735           ierr = PetscSectionGetChart(sectionGlobal, &pStart, &pEnd);CHKERRQ(ierr);
736           for (p = pStart; p < pEnd; ++p) {
737             ierr = PetscSectionGetDof(sectionGlobal, p, &dof);CHKERRQ(ierr);
738             if (dof) {
739               bs = dof;
740               break;
741             }
742           }
743         } else {
744           bs = 1;
745         }
746         /* Must have same blocksize on all procs (some might have no points) */
747         bsLocal = bs;
748         ierr    = MPIU_Allreduce(&bsLocal, &bs, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)da));CHKERRQ(ierr);
749       }
750       ierr = PetscCalloc4(localSize/bs, &dnz, localSize/bs, &onz, localSize/bs, &dnzu, localSize/bs, &onzu);CHKERRQ(ierr);
751       /* ierr = DMPlexPreallocateOperator(dm, bs, section, sectionGlobal, dnz, onz, dnzu, onzu, *J, fillMatrix);CHKERRQ(ierr); */
752       ierr = PetscFree4(dnz, onz, dnzu, onzu);CHKERRQ(ierr);
753     }
754   }
755   /*
756                                   m
757           ------------------------------------------------------
758          |                                                     |
759          |                                                     |
760          |               ----------------------                |
761          |               |                    |                |
762       n  |           ny  |                    |                |
763          |               |                    |                |
764          |               .---------------------                |
765          |             (xs,ys)     nx                          |
766          |            .                                        |
767          |         (gxs,gys)                                   |
768          |                                                     |
769           -----------------------------------------------------
770   */
771 
772   /*
773          nc - number of components per grid point
774          col - number of colors needed in one direction for single component problem
775 
776   */
777   M   = dd->M;
778   N   = dd->N;
779   P   = dd->P;
780   dim = da->dim;
781   dof = dd->w;
782   /* ierr = DMDAGetInfo(da,&dim,&M,&N,&P,0,0,0,&dof,0,0,0,0,0);CHKERRQ(ierr); */
783   ierr = DMDAGetCorners(da,0,0,0,&nx,&ny,&nz);CHKERRQ(ierr);
784   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
785   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
786   ierr = MatSetSizes(A,dof*nx*ny*nz,dof*nx*ny*nz,dof*M*N*P,dof*M*N*P);CHKERRQ(ierr);
787   ierr = MatSetType(A,mtype);CHKERRQ(ierr);
788   ierr = MatSetFromOptions(A);CHKERRQ(ierr);
789   ierr = MatSetDM(A,da);CHKERRQ(ierr);
790   if (da->structure_only) {
791     ierr = MatSetOption(A,MAT_STRUCTURE_ONLY,PETSC_TRUE);CHKERRQ(ierr);
792   }
793   ierr = MatGetType(A,&Atype);CHKERRQ(ierr);
794   /*
795      We do not provide a getmatrix function in the DMDA operations because
796    the basic DMDA does not know about matrices. We think of DMDA as being more
797    more low-level than matrices. This is kind of cheating but, cause sometimes
798    we think of DMDA has higher level than matrices.
799 
800      We could switch based on Atype (or mtype), but we do not since the
801    specialized setting routines depend only on the particular preallocation
802    details of the matrix, not the type itself.
803   */
804   ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJSetPreallocation_C",&aij);CHKERRQ(ierr);
805   if (!aij) {
806     ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJSetPreallocation_C",&aij);CHKERRQ(ierr);
807   }
808   if (!aij) {
809     ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIBAIJSetPreallocation_C",&baij);CHKERRQ(ierr);
810     if (!baij) {
811       ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",&baij);CHKERRQ(ierr);
812     }
813     if (!baij) {
814       ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPISBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr);
815       if (!sbaij) {
816         ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr);
817       }
818       if (!sbaij) {
819         ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPISELLSetPreallocation_C",&sell);CHKERRQ(ierr);
820         if (!sell) {
821           ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqSELLSetPreallocation_C",&sell);CHKERRQ(ierr);
822         }
823       }
824       if (!sell) {
825         ierr = PetscObjectQueryFunction((PetscObject)A,"MatISSetPreallocation_C",&is);CHKERRQ(ierr);
826       }
827     }
828   }
829   if (aij) {
830     if (dim == 1) {
831       if (dd->ofill) {
832         ierr = DMCreateMatrix_DA_1d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
833       } else {
834         ierr = DMCreateMatrix_DA_1d_MPIAIJ(da,A);CHKERRQ(ierr);
835       }
836     } else if (dim == 2) {
837       if (dd->ofill) {
838         ierr = DMCreateMatrix_DA_2d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
839       } else {
840         ierr = DMCreateMatrix_DA_2d_MPIAIJ(da,A);CHKERRQ(ierr);
841       }
842     } else if (dim == 3) {
843       if (dd->ofill) {
844         ierr = DMCreateMatrix_DA_3d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
845       } else {
846         ierr = DMCreateMatrix_DA_3d_MPIAIJ(da,A);CHKERRQ(ierr);
847       }
848     }
849   } else if (baij) {
850     if (dim == 2) {
851       ierr = DMCreateMatrix_DA_2d_MPIBAIJ(da,A);CHKERRQ(ierr);
852     } else if (dim == 3) {
853       ierr = DMCreateMatrix_DA_3d_MPIBAIJ(da,A);CHKERRQ(ierr);
854     } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
855   } else if (sbaij) {
856     if (dim == 2) {
857       ierr = DMCreateMatrix_DA_2d_MPISBAIJ(da,A);CHKERRQ(ierr);
858     } else if (dim == 3) {
859       ierr = DMCreateMatrix_DA_3d_MPISBAIJ(da,A);CHKERRQ(ierr);
860     } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
861   } else if (sell) {
862      if (dim == 2) {
863        ierr = DMCreateMatrix_DA_2d_MPISELL(da,A);CHKERRQ(ierr);
864      } else if (dim == 3) {
865        ierr = DMCreateMatrix_DA_3d_MPISELL(da,A);CHKERRQ(ierr);
866      } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
867   } else if (is) {
868     ierr = DMCreateMatrix_DA_IS(da,A);CHKERRQ(ierr);
869   } else {
870     ISLocalToGlobalMapping ltog;
871 
872     ierr = MatSetBlockSize(A,dof);CHKERRQ(ierr);
873     ierr = MatSetUp(A);CHKERRQ(ierr);
874     ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
875     ierr = MatSetLocalToGlobalMapping(A,ltog,ltog);CHKERRQ(ierr);
876   }
877   ierr = DMDAGetGhostCorners(da,&starts[0],&starts[1],&starts[2],&dims[0],&dims[1],&dims[2]);CHKERRQ(ierr);
878   ierr = MatSetStencil(A,dim,dims,starts,dof);CHKERRQ(ierr);
879   ierr = MatSetDM(A,da);CHKERRQ(ierr);
880   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
881   if (size > 1) {
882     /* change viewer to display matrix in natural ordering */
883     ierr = MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA);CHKERRQ(ierr);
884     ierr = MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA);CHKERRQ(ierr);
885   }
886   *J = A;
887   PetscFunctionReturn(0);
888 }
889 
890 /* ---------------------------------------------------------------------------------*/
891 PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
892 
893 PetscErrorCode DMCreateMatrix_DA_IS(DM dm,Mat J)
894 {
895   DM_DA                  *da = (DM_DA*)dm->data;
896   Mat                    lJ;
897   ISLocalToGlobalMapping ltog;
898   IS                     is_loc_filt, is_glob;
899   const PetscInt         *e_loc,*idx;
900   PetscInt               nel,nen,nv,dof,dim,*gidx,nb;
901   PetscBool              flg;
902   PetscErrorCode         ierr;
903 
904   /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted)
905      We need to filter the local indices that are represented through the DMDAGetElements decomposition
906      This is because the size of the local matrices in MATIS is the local size of the l2g map */
907   PetscFunctionBegin;
908   dof  = da->w;
909   dim  = dm->dim;
910 
911   ierr = MatSetBlockSize(J,dof);CHKERRQ(ierr);
912 
913   /* get local elements indices in local DMDA numbering */
914   ierr = DMDAGetElements(dm,&nel,&nen,&e_loc);CHKERRQ(ierr); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */
915   ierr = ISCreateBlock(PetscObjectComm((PetscObject)dm),dof,nel*nen,e_loc,PETSC_COPY_VALUES,&is_loc_filt);CHKERRQ(ierr);
916   ierr = DMDARestoreElements(dm,&nel,&nen,&e_loc);CHKERRQ(ierr);
917 
918   /* obtain a consistent local ordering for MATIS */
919   ierr = ISSortRemoveDups(is_loc_filt);CHKERRQ(ierr);
920   ierr = ISBlockGetLocalSize(is_loc_filt,&nb);CHKERRQ(ierr);
921   ierr = DMGetLocalToGlobalMapping(dm,&ltog);CHKERRQ(ierr);
922   ierr = ISLocalToGlobalMappingGetSize(ltog,&nv);CHKERRQ(ierr);
923   ierr = PetscMalloc1(PetscMax(nb,nv/dof),&gidx);CHKERRQ(ierr);
924   ierr = ISBlockGetIndices(is_loc_filt,&idx);CHKERRQ(ierr);
925   ierr = ISLocalToGlobalMappingApplyBlock(ltog,nb,idx,gidx);CHKERRQ(ierr);
926   ierr = ISBlockRestoreIndices(is_loc_filt,&idx);CHKERRQ(ierr);
927   ierr = ISCreateBlock(PetscObjectComm((PetscObject)dm),dof,nb,gidx,PETSC_USE_POINTER,&is_glob);CHKERRQ(ierr);
928   ierr = ISLocalToGlobalMappingCreateIS(is_glob,&ltog);CHKERRQ(ierr);
929   ierr = ISDestroy(&is_glob);CHKERRQ(ierr);
930   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
931   ierr = ISLocalToGlobalMappingDestroy(&ltog);CHKERRQ(ierr);
932 
933   /* We also attach a l2g map to the local matrices to have MatSetValueLocal to work */
934   ierr = MatISGetLocalMat(J,&lJ);CHKERRQ(ierr);
935   ierr = ISLocalToGlobalMappingCreateIS(is_loc_filt,&ltog);CHKERRQ(ierr);
936   ierr = ISDestroy(&is_loc_filt);CHKERRQ(ierr);
937   ierr = ISCreateStride(PetscObjectComm((PetscObject)lJ),nv/dof,0,1,&is_glob);CHKERRQ(ierr);
938   ierr = ISGetIndices(is_glob,&idx);CHKERRQ(ierr);
939   ierr = ISGlobalToLocalMappingApplyBlock(ltog,IS_GTOLM_MASK,nv/dof,idx,&nb,gidx);CHKERRQ(ierr);
940   ierr = ISRestoreIndices(is_glob,&idx);CHKERRQ(ierr);
941   ierr = ISDestroy(&is_glob);CHKERRQ(ierr);
942   ierr = ISLocalToGlobalMappingDestroy(&ltog);CHKERRQ(ierr);
943   ierr = ISCreateBlock(PETSC_COMM_SELF,dof,nb,gidx,PETSC_USE_POINTER,&is_loc_filt);CHKERRQ(ierr);
944   ierr = ISLocalToGlobalMappingCreateIS(is_loc_filt,&ltog);CHKERRQ(ierr);
945   ierr = ISDestroy(&is_loc_filt);CHKERRQ(ierr);
946   ierr = MatSetLocalToGlobalMapping(lJ,ltog,ltog);CHKERRQ(ierr);
947   ierr = ISLocalToGlobalMappingDestroy(&ltog);CHKERRQ(ierr);
948   ierr = PetscFree(gidx);CHKERRQ(ierr);
949 
950   /* Preallocation (not exact): we reuse the preallocation routines of the assembled version  */
951   flg = dm->prealloc_only;
952   dm->prealloc_only = PETSC_TRUE;
953   switch (dim) {
954   case 1:
955     ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",MatISSetPreallocation_IS);CHKERRQ(ierr);
956     ierr = DMCreateMatrix_DA_1d_MPIAIJ(dm,J);CHKERRQ(ierr);
957     ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
958     break;
959   case 2:
960     ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",MatISSetPreallocation_IS);CHKERRQ(ierr);
961     ierr = DMCreateMatrix_DA_2d_MPIAIJ(dm,J);CHKERRQ(ierr);
962     ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
963     break;
964   case 3:
965     ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",MatISSetPreallocation_IS);CHKERRQ(ierr);
966     ierr = DMCreateMatrix_DA_3d_MPIAIJ(dm,J);CHKERRQ(ierr);
967     ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
968     break;
969   default:
970     SETERRQ1(PetscObjectComm((PetscObject)dm),PETSC_ERR_SUP,"Unhandled dimension %d",dim);
971     break;
972   }
973   dm->prealloc_only = flg;
974   PetscFunctionReturn(0);
975 }
976 
977 PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da,Mat J)
978 {
979   PetscErrorCode         ierr;
980   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p;
981   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
982   MPI_Comm               comm;
983   PetscScalar            *values;
984   DMBoundaryType         bx,by;
985   ISLocalToGlobalMapping ltog;
986   DMDAStencilType        st;
987 
988   PetscFunctionBegin;
989   /*
990          nc - number of components per grid point
991          col - number of colors needed in one direction for single component problem
992 
993   */
994   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
995   col  = 2*s + 1;
996   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
997   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
998   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
999 
1000   ierr = PetscMalloc2(nc,&rows,col*col*nc*nc,&cols);CHKERRQ(ierr);
1001   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1002 
1003   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1004   /* determine the matrix preallocation information */
1005   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
1006   for (i=xs; i<xs+nx; i++) {
1007 
1008     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1009     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1010 
1011     for (j=ys; j<ys+ny; j++) {
1012       slot = i - gxs + gnx*(j - gys);
1013 
1014       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1015       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1016 
1017       cnt = 0;
1018       for (k=0; k<nc; k++) {
1019         for (l=lstart; l<lend+1; l++) {
1020           for (p=pstart; p<pend+1; p++) {
1021             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
1022               cols[cnt++] = k + nc*(slot + gnx*l + p);
1023             }
1024           }
1025         }
1026         rows[k] = k + nc*(slot);
1027       }
1028       ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1029     }
1030   }
1031   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1032   ierr = MatSeqSELLSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1033   ierr = MatMPISELLSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1034   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1035 
1036   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1037 
1038   /*
1039     For each node in the grid: we get the neighbors in the local (on processor ordering
1040     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1041     PETSc ordering.
1042   */
1043   if (!da->prealloc_only) {
1044     ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr);
1045     for (i=xs; i<xs+nx; i++) {
1046 
1047       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1048       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1049 
1050       for (j=ys; j<ys+ny; j++) {
1051         slot = i - gxs + gnx*(j - gys);
1052 
1053         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1054         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1055 
1056         cnt = 0;
1057         for (k=0; k<nc; k++) {
1058           for (l=lstart; l<lend+1; l++) {
1059             for (p=pstart; p<pend+1; p++) {
1060               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
1061                 cols[cnt++] = k + nc*(slot + gnx*l + p);
1062               }
1063             }
1064           }
1065           rows[k] = k + nc*(slot);
1066         }
1067         ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1068       }
1069     }
1070     ierr = PetscFree(values);CHKERRQ(ierr);
1071     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1072     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1073     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1074   }
1075   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1076   PetscFunctionReturn(0);
1077 }
1078 
1079 PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da,Mat J)
1080 {
1081   PetscErrorCode         ierr;
1082   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1083   PetscInt               m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL;
1084   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
1085   MPI_Comm               comm;
1086   PetscScalar            *values;
1087   DMBoundaryType         bx,by,bz;
1088   ISLocalToGlobalMapping ltog;
1089   DMDAStencilType        st;
1090 
1091   PetscFunctionBegin;
1092   /*
1093          nc - number of components per grid point
1094          col - number of colors needed in one direction for single component problem
1095 
1096   */
1097   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1098   col  = 2*s + 1;
1099   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1100   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1101   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1102 
1103   ierr = PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols);CHKERRQ(ierr);
1104   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1105 
1106   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1107   /* determine the matrix preallocation information */
1108   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1109   for (i=xs; i<xs+nx; i++) {
1110     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1111     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1112     for (j=ys; j<ys+ny; j++) {
1113       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1114       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1115       for (k=zs; k<zs+nz; k++) {
1116         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1117         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1118 
1119         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1120 
1121         cnt = 0;
1122         for (l=0; l<nc; l++) {
1123           for (ii=istart; ii<iend+1; ii++) {
1124             for (jj=jstart; jj<jend+1; jj++) {
1125               for (kk=kstart; kk<kend+1; kk++) {
1126                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1127                   cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1128                 }
1129               }
1130             }
1131           }
1132           rows[l] = l + nc*(slot);
1133         }
1134         ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1135       }
1136     }
1137   }
1138   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1139   ierr = MatSeqSELLSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1140   ierr = MatMPISELLSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1141   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1142   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1143 
1144   /*
1145     For each node in the grid: we get the neighbors in the local (on processor ordering
1146     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1147     PETSc ordering.
1148   */
1149   if (!da->prealloc_only) {
1150     ierr = PetscCalloc1(col*col*col*nc*nc*nc,&values);CHKERRQ(ierr);
1151     for (i=xs; i<xs+nx; i++) {
1152       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1153       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1154       for (j=ys; j<ys+ny; j++) {
1155         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1156         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1157         for (k=zs; k<zs+nz; k++) {
1158           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1159           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1160 
1161           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1162 
1163           cnt = 0;
1164           for (l=0; l<nc; l++) {
1165             for (ii=istart; ii<iend+1; ii++) {
1166               for (jj=jstart; jj<jend+1; jj++) {
1167                 for (kk=kstart; kk<kend+1; kk++) {
1168                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1169                     cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1170                   }
1171                 }
1172               }
1173             }
1174             rows[l] = l + nc*(slot);
1175           }
1176           ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1177         }
1178       }
1179     }
1180     ierr = PetscFree(values);CHKERRQ(ierr);
1181     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1182     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1183     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1184   }
1185   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1186   PetscFunctionReturn(0);
1187 }
1188 
1189 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da,Mat J)
1190 {
1191   PetscErrorCode         ierr;
1192   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,M,N;
1193   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
1194   MPI_Comm               comm;
1195   PetscScalar            *values;
1196   DMBoundaryType         bx,by;
1197   ISLocalToGlobalMapping ltog,mltog;
1198   DMDAStencilType        st;
1199   PetscBool              removedups = PETSC_FALSE;
1200 
1201   PetscFunctionBegin;
1202   /*
1203          nc - number of components per grid point
1204          col - number of colors needed in one direction for single component problem
1205 
1206   */
1207   ierr = DMDAGetInfo(da,&dim,&m,&n,&M,&N,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
1208   col  = 2*s + 1;
1209   /*
1210        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1211        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1212   */
1213   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1214   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1215   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
1216   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
1217   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1218 
1219   ierr = PetscMalloc2(nc,&rows,col*col*nc*nc,&cols);CHKERRQ(ierr);
1220   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1221 
1222   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1223   /* determine the matrix preallocation information */
1224   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
1225   for (i=xs; i<xs+nx; i++) {
1226 
1227     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1228     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1229 
1230     for (j=ys; j<ys+ny; j++) {
1231       slot = i - gxs + gnx*(j - gys);
1232 
1233       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1234       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1235 
1236       cnt = 0;
1237       for (k=0; k<nc; k++) {
1238         for (l=lstart; l<lend+1; l++) {
1239           for (p=pstart; p<pend+1; p++) {
1240             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
1241               cols[cnt++] = k + nc*(slot + gnx*l + p);
1242             }
1243           }
1244         }
1245         rows[k] = k + nc*(slot);
1246       }
1247       if (removedups) {
1248         ierr = MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1249       } else {
1250         ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1251       }
1252     }
1253   }
1254   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1255   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1256   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1257   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1258   ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr);
1259   if (!mltog) {
1260     ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1261   }
1262 
1263   /*
1264     For each node in the grid: we get the neighbors in the local (on processor ordering
1265     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1266     PETSc ordering.
1267   */
1268   if (!da->prealloc_only) {
1269     ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr);
1270     for (i=xs; i<xs+nx; i++) {
1271 
1272       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1273       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1274 
1275       for (j=ys; j<ys+ny; j++) {
1276         slot = i - gxs + gnx*(j - gys);
1277 
1278         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1279         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1280 
1281         cnt = 0;
1282         for (k=0; k<nc; k++) {
1283           for (l=lstart; l<lend+1; l++) {
1284             for (p=pstart; p<pend+1; p++) {
1285               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
1286                 cols[cnt++] = k + nc*(slot + gnx*l + p);
1287               }
1288             }
1289           }
1290           rows[k] = k + nc*(slot);
1291         }
1292         ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1293       }
1294     }
1295     ierr = PetscFree(values);CHKERRQ(ierr);
1296     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1297     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1298     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1299   }
1300   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1301   PetscFunctionReturn(0);
1302 }
1303 
1304 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da,Mat J)
1305 {
1306   PetscErrorCode         ierr;
1307   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1308   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt,maxcnt = 0,l,p,M,N;
1309   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
1310   DM_DA                  *dd = (DM_DA*)da->data;
1311   PetscInt               ifill_col,*ofill = dd->ofill, *dfill = dd->dfill;
1312   MPI_Comm               comm;
1313   PetscScalar            *values;
1314   DMBoundaryType         bx,by;
1315   ISLocalToGlobalMapping ltog;
1316   DMDAStencilType        st;
1317   PetscBool              removedups = PETSC_FALSE;
1318 
1319   PetscFunctionBegin;
1320   /*
1321          nc - number of components per grid point
1322          col - number of colors needed in one direction for single component problem
1323 
1324   */
1325   ierr = DMDAGetInfo(da,&dim,&m,&n,&M,&N,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
1326   col  = 2*s + 1;
1327   /*
1328        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1329        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1330   */
1331   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1332   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1333   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
1334   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
1335   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1336 
1337   ierr = PetscMalloc1(col*col*nc,&cols);CHKERRQ(ierr);
1338   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1339 
1340   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1341   /* determine the matrix preallocation information */
1342   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
1343   for (i=xs; i<xs+nx; i++) {
1344 
1345     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1346     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1347 
1348     for (j=ys; j<ys+ny; j++) {
1349       slot = i - gxs + gnx*(j - gys);
1350 
1351       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1352       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1353 
1354       for (k=0; k<nc; k++) {
1355         cnt = 0;
1356         for (l=lstart; l<lend+1; l++) {
1357           for (p=pstart; p<pend+1; p++) {
1358             if (l || p) {
1359               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
1360                 for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p);
1361               }
1362             } else {
1363               if (dfill) {
1364                 for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p);
1365               } else {
1366                 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p);
1367               }
1368             }
1369           }
1370         }
1371         row    = k + nc*(slot);
1372         maxcnt = PetscMax(maxcnt,cnt);
1373         if (removedups) {
1374           ierr   = MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1375         } else {
1376           ierr   = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1377         }
1378       }
1379     }
1380   }
1381   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1382   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1383   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1384   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1385 
1386   /*
1387     For each node in the grid: we get the neighbors in the local (on processor ordering
1388     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1389     PETSc ordering.
1390   */
1391   if (!da->prealloc_only) {
1392     ierr = PetscCalloc1(maxcnt,&values);CHKERRQ(ierr);
1393     for (i=xs; i<xs+nx; i++) {
1394 
1395       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1396       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1397 
1398       for (j=ys; j<ys+ny; j++) {
1399         slot = i - gxs + gnx*(j - gys);
1400 
1401         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1402         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1403 
1404         for (k=0; k<nc; k++) {
1405           cnt = 0;
1406           for (l=lstart; l<lend+1; l++) {
1407             for (p=pstart; p<pend+1; p++) {
1408               if (l || p) {
1409                 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
1410                   for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p);
1411                 }
1412               } else {
1413                 if (dfill) {
1414                   for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p);
1415                 } else {
1416                   for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p);
1417                 }
1418               }
1419             }
1420           }
1421           row  = k + nc*(slot);
1422           ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1423         }
1424       }
1425     }
1426     ierr = PetscFree(values);CHKERRQ(ierr);
1427     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1428     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1429     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1430   }
1431   ierr = PetscFree(cols);CHKERRQ(ierr);
1432   PetscFunctionReturn(0);
1433 }
1434 
1435 /* ---------------------------------------------------------------------------------*/
1436 
1437 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da,Mat J)
1438 {
1439   PetscErrorCode         ierr;
1440   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1441   PetscInt               m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL;
1442   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
1443   MPI_Comm               comm;
1444   PetscScalar            *values;
1445   DMBoundaryType         bx,by,bz;
1446   ISLocalToGlobalMapping ltog,mltog;
1447   DMDAStencilType        st;
1448   PetscBool              removedups = PETSC_FALSE;
1449 
1450   PetscFunctionBegin;
1451   /*
1452          nc - number of components per grid point
1453          col - number of colors needed in one direction for single component problem
1454 
1455   */
1456   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1457   col  = 2*s + 1;
1458 
1459   /*
1460        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1461        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1462   */
1463   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1464   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1465   if (P == 1 && 2*s >= p) removedups = PETSC_TRUE;
1466 
1467   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1468   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1469   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1470 
1471   ierr = PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols);CHKERRQ(ierr);
1472   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1473 
1474   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1475   /* determine the matrix preallocation information */
1476   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1477   for (i=xs; i<xs+nx; i++) {
1478     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1479     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1480     for (j=ys; j<ys+ny; j++) {
1481       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1482       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1483       for (k=zs; k<zs+nz; k++) {
1484         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1485         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1486 
1487         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1488 
1489         cnt = 0;
1490         for (l=0; l<nc; l++) {
1491           for (ii=istart; ii<iend+1; ii++) {
1492             for (jj=jstart; jj<jend+1; jj++) {
1493               for (kk=kstart; kk<kend+1; kk++) {
1494                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1495                   cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1496                 }
1497               }
1498             }
1499           }
1500           rows[l] = l + nc*(slot);
1501         }
1502         if (removedups) {
1503           ierr = MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1504         } else {
1505           ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1506         }
1507       }
1508     }
1509   }
1510   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1511   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1512   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1513   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1514   ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr);
1515   if (!mltog) {
1516     ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1517   }
1518 
1519   /*
1520     For each node in the grid: we get the neighbors in the local (on processor ordering
1521     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1522     PETSc ordering.
1523   */
1524   if (!da->prealloc_only) {
1525     ierr = PetscCalloc1(col*col*col*nc*nc*nc,&values);CHKERRQ(ierr);
1526     for (i=xs; i<xs+nx; i++) {
1527       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1528       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1529       for (j=ys; j<ys+ny; j++) {
1530         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1531         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1532         for (k=zs; k<zs+nz; k++) {
1533           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1534           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1535 
1536           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1537 
1538           cnt = 0;
1539           for (l=0; l<nc; l++) {
1540             for (ii=istart; ii<iend+1; ii++) {
1541               for (jj=jstart; jj<jend+1; jj++) {
1542                 for (kk=kstart; kk<kend+1; kk++) {
1543                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1544                     cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1545                   }
1546                 }
1547               }
1548             }
1549             rows[l] = l + nc*(slot);
1550           }
1551           ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1552         }
1553       }
1554     }
1555     ierr = PetscFree(values);CHKERRQ(ierr);
1556     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1557     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1558     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1559   }
1560   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1561   PetscFunctionReturn(0);
1562 }
1563 
1564 /* ---------------------------------------------------------------------------------*/
1565 
1566 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da,Mat J)
1567 {
1568   PetscErrorCode         ierr;
1569   DM_DA                  *dd = (DM_DA*)da->data;
1570   PetscInt               xs,nx,i,j,gxs,gnx,row,k,l;
1571   PetscInt               m,dim,s,*cols = NULL,nc,cnt,maxcnt = 0,*ocols;
1572   PetscInt               *ofill = dd->ofill,*dfill = dd->dfill;
1573   PetscScalar            *values;
1574   DMBoundaryType         bx;
1575   ISLocalToGlobalMapping ltog;
1576   PetscMPIInt            rank,size;
1577 
1578   PetscFunctionBegin;
1579   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)da),&rank);CHKERRQ(ierr);
1580   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)da),&size);CHKERRQ(ierr);
1581 
1582   /*
1583          nc - number of components per grid point
1584 
1585   */
1586   ierr = DMDAGetInfo(da,&dim,&m,0,0,0,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr);
1587   if (s > 1) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Matrix creation for 1d not implemented correctly for stencil width larger than 1");
1588   ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
1589   ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
1590 
1591   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1592   ierr = PetscCalloc2(nx*nc,&cols,nx*nc,&ocols);CHKERRQ(ierr);
1593 
1594   /*
1595         note should be smaller for first and last process with no periodic
1596         does not handle dfill
1597   */
1598   cnt = 0;
1599   /* coupling with process to the left */
1600   for (i=0; i<s; i++) {
1601     for (j=0; j<nc; j++) {
1602       ocols[cnt] = ((!rank) ? 0 : (s - i)*(ofill[j+1] - ofill[j]));
1603       cols[cnt]  = dfill[j+1] - dfill[j] + (s + i)*(ofill[j+1] - ofill[j]);
1604       if (!rank && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1605         if (size > 1) ocols[cnt] += (s - i)*(ofill[j+1] - ofill[j]);
1606         else cols[cnt] += (s - i)*(ofill[j+1] - ofill[j]);
1607       }
1608       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1609       cnt++;
1610     }
1611   }
1612   for (i=s; i<nx-s; i++) {
1613     for (j=0; j<nc; j++) {
1614       cols[cnt] = dfill[j+1] - dfill[j] + 2*s*(ofill[j+1] - ofill[j]);
1615       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1616       cnt++;
1617     }
1618   }
1619   /* coupling with process to the right */
1620   for (i=nx-s; i<nx; i++) {
1621     for (j=0; j<nc; j++) {
1622       ocols[cnt] = ((rank == (size-1)) ? 0 : (i - nx + s + 1)*(ofill[j+1] - ofill[j]));
1623       cols[cnt]  = dfill[j+1] - dfill[j] + (s + nx - i - 1)*(ofill[j+1] - ofill[j]);
1624       if ((rank == size-1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1625         if (size > 1) ocols[cnt] += (i - nx + s + 1)*(ofill[j+1] - ofill[j]);
1626         else cols[cnt] += (i - nx + s + 1)*(ofill[j+1] - ofill[j]);
1627       }
1628       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1629       cnt++;
1630     }
1631   }
1632 
1633   ierr = MatSeqAIJSetPreallocation(J,0,cols);CHKERRQ(ierr);
1634   ierr = MatMPIAIJSetPreallocation(J,0,cols,0,ocols);CHKERRQ(ierr);
1635   ierr = PetscFree2(cols,ocols);CHKERRQ(ierr);
1636 
1637   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1638   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1639 
1640   /*
1641     For each node in the grid: we get the neighbors in the local (on processor ordering
1642     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1643     PETSc ordering.
1644   */
1645   if (!da->prealloc_only) {
1646     ierr = PetscCalloc2(maxcnt,&values,maxcnt,&cols);CHKERRQ(ierr);
1647 
1648     row = xs*nc;
1649     /* coupling with process to the left */
1650     for (i=xs; i<xs+s; i++) {
1651       for (j=0; j<nc; j++) {
1652         cnt = 0;
1653         if (rank) {
1654           for (l=0; l<s; l++) {
1655             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1656           }
1657         }
1658         if (!rank && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1659           for (l=0; l<s; l++) {
1660             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (m + i - s - l)*nc + ofill[k];
1661           }
1662         }
1663         if (dfill) {
1664           for (k=dfill[j]; k<dfill[j+1]; k++) {
1665             cols[cnt++] = i*nc + dfill[k];
1666           }
1667         } else {
1668           for (k=0; k<nc; k++) {
1669             cols[cnt++] = i*nc + k;
1670           }
1671         }
1672         for (l=0; l<s; l++) {
1673           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1674         }
1675         ierr = MatSetValues(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1676         row++;
1677       }
1678     }
1679     for (i=xs+s; i<xs+nx-s; i++) {
1680       for (j=0; j<nc; j++) {
1681         cnt = 0;
1682         for (l=0; l<s; l++) {
1683           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1684         }
1685         if (dfill) {
1686           for (k=dfill[j]; k<dfill[j+1]; k++) {
1687             cols[cnt++] = i*nc + dfill[k];
1688           }
1689         } else {
1690           for (k=0; k<nc; k++) {
1691             cols[cnt++] = i*nc + k;
1692           }
1693         }
1694         for (l=0; l<s; l++) {
1695           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1696         }
1697         ierr = MatSetValues(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1698         row++;
1699       }
1700     }
1701     /* coupling with process to the right */
1702     for (i=xs+nx-s; i<xs+nx; i++) {
1703       for (j=0; j<nc; j++) {
1704         cnt = 0;
1705         for (l=0; l<s; l++) {
1706           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1707         }
1708         if (dfill) {
1709           for (k=dfill[j]; k<dfill[j+1]; k++) {
1710             cols[cnt++] = i*nc + dfill[k];
1711           }
1712         } else {
1713           for (k=0; k<nc; k++) {
1714             cols[cnt++] = i*nc + k;
1715           }
1716         }
1717         if (rank < size-1) {
1718           for (l=0; l<s; l++) {
1719             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1720           }
1721         }
1722         if ((rank == size-1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1723           for (l=0; l<s; l++) {
1724             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s - l - m + 2)*nc + ofill[k];
1725           }
1726         }
1727         ierr = MatSetValues(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1728         row++;
1729       }
1730     }
1731     ierr = PetscFree2(values,cols);CHKERRQ(ierr);
1732     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1733     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1734     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1735   }
1736   PetscFunctionReturn(0);
1737 }
1738 
1739 /* ---------------------------------------------------------------------------------*/
1740 
1741 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da,Mat J)
1742 {
1743   PetscErrorCode         ierr;
1744   PetscInt               xs,nx,i,i1,slot,gxs,gnx;
1745   PetscInt               m,dim,s,*cols = NULL,nc,*rows = NULL,col,cnt,l;
1746   PetscInt               istart,iend;
1747   PetscScalar            *values;
1748   DMBoundaryType         bx;
1749   ISLocalToGlobalMapping ltog,mltog;
1750 
1751   PetscFunctionBegin;
1752   /*
1753          nc - number of components per grid point
1754          col - number of colors needed in one direction for single component problem
1755 
1756   */
1757   ierr = DMDAGetInfo(da,&dim,&m,0,0,0,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr);
1758   col  = 2*s + 1;
1759 
1760   ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
1761   ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
1762 
1763   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1764   ierr = MatSeqAIJSetPreallocation(J,col*nc,0);CHKERRQ(ierr);
1765   ierr = MatMPIAIJSetPreallocation(J,col*nc,0,col*nc,0);CHKERRQ(ierr);
1766 
1767   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1768   ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr);
1769   if (!mltog) {
1770     ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1771   }
1772 
1773   /*
1774     For each node in the grid: we get the neighbors in the local (on processor ordering
1775     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1776     PETSc ordering.
1777   */
1778   if (!da->prealloc_only) {
1779     ierr = PetscMalloc2(nc,&rows,col*nc*nc,&cols);CHKERRQ(ierr);
1780     ierr = PetscCalloc1(col*nc*nc,&values);CHKERRQ(ierr);
1781     for (i=xs; i<xs+nx; i++) {
1782       istart = PetscMax(-s,gxs - i);
1783       iend   = PetscMin(s,gxs + gnx - i - 1);
1784       slot   = i - gxs;
1785 
1786       cnt = 0;
1787       for (l=0; l<nc; l++) {
1788         for (i1=istart; i1<iend+1; i1++) {
1789           cols[cnt++] = l + nc*(slot + i1);
1790         }
1791         rows[l] = l + nc*(slot);
1792       }
1793       ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1794     }
1795     ierr = PetscFree(values);CHKERRQ(ierr);
1796     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1797     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1798     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1799     ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1800   }
1801   PetscFunctionReturn(0);
1802 }
1803 
1804 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da,Mat J)
1805 {
1806   PetscErrorCode         ierr;
1807   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1808   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
1809   PetscInt               istart,iend,jstart,jend,ii,jj;
1810   MPI_Comm               comm;
1811   PetscScalar            *values;
1812   DMBoundaryType         bx,by;
1813   DMDAStencilType        st;
1814   ISLocalToGlobalMapping ltog;
1815 
1816   PetscFunctionBegin;
1817   /*
1818      nc - number of components per grid point
1819      col - number of colors needed in one direction for single component problem
1820   */
1821   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
1822   col  = 2*s + 1;
1823 
1824   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
1825   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
1826   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1827 
1828   ierr = PetscMalloc1(col*col*nc*nc,&cols);CHKERRQ(ierr);
1829 
1830   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1831 
1832   /* determine the matrix preallocation information */
1833   ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
1834   for (i=xs; i<xs+nx; i++) {
1835     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1836     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1837     for (j=ys; j<ys+ny; j++) {
1838       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1839       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1840       slot   = i - gxs + gnx*(j - gys);
1841 
1842       /* Find block columns in block row */
1843       cnt = 0;
1844       for (ii=istart; ii<iend+1; ii++) {
1845         for (jj=jstart; jj<jend+1; jj++) {
1846           if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1847             cols[cnt++] = slot + ii + gnx*jj;
1848           }
1849         }
1850       }
1851       ierr = MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1852     }
1853   }
1854   ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1855   ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1856   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1857 
1858   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1859 
1860   /*
1861     For each node in the grid: we get the neighbors in the local (on processor ordering
1862     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1863     PETSc ordering.
1864   */
1865   if (!da->prealloc_only) {
1866     ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr);
1867     for (i=xs; i<xs+nx; i++) {
1868       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1869       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1870       for (j=ys; j<ys+ny; j++) {
1871         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1872         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1873         slot = i - gxs + gnx*(j - gys);
1874         cnt  = 0;
1875         for (ii=istart; ii<iend+1; ii++) {
1876           for (jj=jstart; jj<jend+1; jj++) {
1877             if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1878               cols[cnt++] = slot + ii + gnx*jj;
1879             }
1880           }
1881         }
1882         ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1883       }
1884     }
1885     ierr = PetscFree(values);CHKERRQ(ierr);
1886     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1887     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1888     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1889   }
1890   ierr = PetscFree(cols);CHKERRQ(ierr);
1891   PetscFunctionReturn(0);
1892 }
1893 
1894 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da,Mat J)
1895 {
1896   PetscErrorCode         ierr;
1897   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1898   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
1899   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1900   MPI_Comm               comm;
1901   PetscScalar            *values;
1902   DMBoundaryType         bx,by,bz;
1903   DMDAStencilType        st;
1904   ISLocalToGlobalMapping ltog;
1905 
1906   PetscFunctionBegin;
1907   /*
1908          nc - number of components per grid point
1909          col - number of colors needed in one direction for single component problem
1910 
1911   */
1912   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1913   col  = 2*s + 1;
1914 
1915   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1916   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1917   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1918 
1919   ierr = PetscMalloc1(col*col*col,&cols);CHKERRQ(ierr);
1920 
1921   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1922 
1923   /* determine the matrix preallocation information */
1924   ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1925   for (i=xs; i<xs+nx; i++) {
1926     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1927     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1928     for (j=ys; j<ys+ny; j++) {
1929       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1930       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1931       for (k=zs; k<zs+nz; k++) {
1932         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1933         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1934 
1935         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1936 
1937         /* Find block columns in block row */
1938         cnt = 0;
1939         for (ii=istart; ii<iend+1; ii++) {
1940           for (jj=jstart; jj<jend+1; jj++) {
1941             for (kk=kstart; kk<kend+1; kk++) {
1942               if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1943                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1944               }
1945             }
1946           }
1947         }
1948         ierr = MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1949       }
1950     }
1951   }
1952   ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1953   ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1954   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1955 
1956   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1957 
1958   /*
1959     For each node in the grid: we get the neighbors in the local (on processor ordering
1960     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1961     PETSc ordering.
1962   */
1963   if (!da->prealloc_only) {
1964     ierr = PetscCalloc1(col*col*col*nc*nc,&values);CHKERRQ(ierr);
1965     for (i=xs; i<xs+nx; i++) {
1966       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1967       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1968       for (j=ys; j<ys+ny; j++) {
1969         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1970         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1971         for (k=zs; k<zs+nz; k++) {
1972           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1973           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1974 
1975           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1976 
1977           cnt = 0;
1978           for (ii=istart; ii<iend+1; ii++) {
1979             for (jj=jstart; jj<jend+1; jj++) {
1980               for (kk=kstart; kk<kend+1; kk++) {
1981                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1982                   cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1983                 }
1984               }
1985             }
1986           }
1987           ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1988         }
1989       }
1990     }
1991     ierr = PetscFree(values);CHKERRQ(ierr);
1992     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1993     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1994     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1995   }
1996   ierr = PetscFree(cols);CHKERRQ(ierr);
1997   PetscFunctionReturn(0);
1998 }
1999 
2000 /*
2001   This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to
2002   identify in the local ordering with periodic domain.
2003 */
2004 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog,PetscInt *row,PetscInt *cnt,PetscInt col[])
2005 {
2006   PetscErrorCode ierr;
2007   PetscInt       i,n;
2008 
2009   PetscFunctionBegin;
2010   ierr = ISLocalToGlobalMappingApplyBlock(ltog,1,row,row);CHKERRQ(ierr);
2011   ierr = ISLocalToGlobalMappingApplyBlock(ltog,*cnt,col,col);CHKERRQ(ierr);
2012   for (i=0,n=0; i<*cnt; i++) {
2013     if (col[i] >= *row) col[n++] = col[i];
2014   }
2015   *cnt = n;
2016   PetscFunctionReturn(0);
2017 }
2018 
2019 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da,Mat J)
2020 {
2021   PetscErrorCode         ierr;
2022   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
2023   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
2024   PetscInt               istart,iend,jstart,jend,ii,jj;
2025   MPI_Comm               comm;
2026   PetscScalar            *values;
2027   DMBoundaryType         bx,by;
2028   DMDAStencilType        st;
2029   ISLocalToGlobalMapping ltog;
2030 
2031   PetscFunctionBegin;
2032   /*
2033      nc - number of components per grid point
2034      col - number of colors needed in one direction for single component problem
2035   */
2036   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
2037   col  = 2*s + 1;
2038 
2039   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
2040   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
2041   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
2042 
2043   ierr = PetscMalloc1(col*col*nc*nc,&cols);CHKERRQ(ierr);
2044 
2045   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
2046 
2047   /* determine the matrix preallocation information */
2048   ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
2049   for (i=xs; i<xs+nx; i++) {
2050     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2051     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2052     for (j=ys; j<ys+ny; j++) {
2053       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2054       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2055       slot   = i - gxs + gnx*(j - gys);
2056 
2057       /* Find block columns in block row */
2058       cnt = 0;
2059       for (ii=istart; ii<iend+1; ii++) {
2060         for (jj=jstart; jj<jend+1; jj++) {
2061           if (st == DMDA_STENCIL_BOX || !ii || !jj) {
2062             cols[cnt++] = slot + ii + gnx*jj;
2063           }
2064         }
2065       }
2066       ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
2067       ierr = MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
2068     }
2069   }
2070   ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
2071   ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
2072   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2073 
2074   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
2075 
2076   /*
2077     For each node in the grid: we get the neighbors in the local (on processor ordering
2078     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2079     PETSc ordering.
2080   */
2081   if (!da->prealloc_only) {
2082     ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr);
2083     for (i=xs; i<xs+nx; i++) {
2084       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2085       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2086       for (j=ys; j<ys+ny; j++) {
2087         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2088         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2089         slot   = i - gxs + gnx*(j - gys);
2090 
2091         /* Find block columns in block row */
2092         cnt = 0;
2093         for (ii=istart; ii<iend+1; ii++) {
2094           for (jj=jstart; jj<jend+1; jj++) {
2095             if (st == DMDA_STENCIL_BOX || !ii || !jj) {
2096               cols[cnt++] = slot + ii + gnx*jj;
2097             }
2098           }
2099         }
2100         ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
2101         ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
2102       }
2103     }
2104     ierr = PetscFree(values);CHKERRQ(ierr);
2105     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2106     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2107     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2108   }
2109   ierr = PetscFree(cols);CHKERRQ(ierr);
2110   PetscFunctionReturn(0);
2111 }
2112 
2113 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da,Mat J)
2114 {
2115   PetscErrorCode         ierr;
2116   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
2117   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
2118   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
2119   MPI_Comm               comm;
2120   PetscScalar            *values;
2121   DMBoundaryType         bx,by,bz;
2122   DMDAStencilType        st;
2123   ISLocalToGlobalMapping ltog;
2124 
2125   PetscFunctionBegin;
2126   /*
2127      nc - number of components per grid point
2128      col - number of colors needed in one direction for single component problem
2129   */
2130   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
2131   col  = 2*s + 1;
2132 
2133   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
2134   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
2135   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
2136 
2137   /* create the matrix */
2138   ierr = PetscMalloc1(col*col*col,&cols);CHKERRQ(ierr);
2139 
2140   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
2141 
2142   /* determine the matrix preallocation information */
2143   ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
2144   for (i=xs; i<xs+nx; i++) {
2145     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2146     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2147     for (j=ys; j<ys+ny; j++) {
2148       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2149       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2150       for (k=zs; k<zs+nz; k++) {
2151         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2152         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2153 
2154         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2155 
2156         /* Find block columns in block row */
2157         cnt = 0;
2158         for (ii=istart; ii<iend+1; ii++) {
2159           for (jj=jstart; jj<jend+1; jj++) {
2160             for (kk=kstart; kk<kend+1; kk++) {
2161               if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
2162                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
2163               }
2164             }
2165           }
2166         }
2167         ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
2168         ierr = MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
2169       }
2170     }
2171   }
2172   ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
2173   ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
2174   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2175 
2176   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
2177 
2178   /*
2179     For each node in the grid: we get the neighbors in the local (on processor ordering
2180     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2181     PETSc ordering.
2182   */
2183   if (!da->prealloc_only) {
2184     ierr = PetscCalloc1(col*col*col*nc*nc,&values);CHKERRQ(ierr);
2185     for (i=xs; i<xs+nx; i++) {
2186       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2187       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2188       for (j=ys; j<ys+ny; j++) {
2189         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2190         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2191         for (k=zs; k<zs+nz; k++) {
2192           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2193           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2194 
2195           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2196 
2197           cnt = 0;
2198           for (ii=istart; ii<iend+1; ii++) {
2199             for (jj=jstart; jj<jend+1; jj++) {
2200               for (kk=kstart; kk<kend+1; kk++) {
2201                 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
2202                   cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
2203                 }
2204               }
2205             }
2206           }
2207           ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
2208           ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
2209         }
2210       }
2211     }
2212     ierr = PetscFree(values);CHKERRQ(ierr);
2213     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2214     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2215     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2216   }
2217   ierr = PetscFree(cols);CHKERRQ(ierr);
2218   PetscFunctionReturn(0);
2219 }
2220 
2221 /* ---------------------------------------------------------------------------------*/
2222 
2223 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da,Mat J)
2224 {
2225   PetscErrorCode         ierr;
2226   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
2227   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt, maxcnt = 0,l,p,*dnz,*onz;
2228   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
2229   DM_DA                  *dd = (DM_DA*)da->data;
2230   PetscInt               ifill_col,*dfill = dd->dfill,*ofill = dd->ofill;
2231   MPI_Comm               comm;
2232   PetscScalar            *values;
2233   DMBoundaryType         bx,by,bz;
2234   ISLocalToGlobalMapping ltog;
2235   DMDAStencilType        st;
2236   PetscBool              removedups = PETSC_FALSE;
2237 
2238   PetscFunctionBegin;
2239   /*
2240          nc - number of components per grid point
2241          col - number of colors needed in one direction for single component problem
2242 
2243   */
2244   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
2245   col  = 2*s + 1;
2246   if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
2247                  by 2*stencil_width + 1\n");
2248   if (by == DM_BOUNDARY_PERIODIC && (n % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
2249                  by 2*stencil_width + 1\n");
2250   if (bz == DM_BOUNDARY_PERIODIC && (p % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
2251                  by 2*stencil_width + 1\n");
2252 
2253   /*
2254        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
2255        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
2256   */
2257   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
2258   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
2259   if (P == 1 && 2*s >= p) removedups = PETSC_TRUE;
2260 
2261   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
2262   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
2263   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
2264 
2265   ierr = PetscMalloc1(col*col*col*nc,&cols);CHKERRQ(ierr);
2266   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
2267 
2268   /* determine the matrix preallocation information */
2269   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
2270 
2271   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
2272   for (i=xs; i<xs+nx; i++) {
2273     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2274     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2275     for (j=ys; j<ys+ny; j++) {
2276       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2277       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2278       for (k=zs; k<zs+nz; k++) {
2279         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2280         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2281 
2282         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2283 
2284         for (l=0; l<nc; l++) {
2285           cnt = 0;
2286           for (ii=istart; ii<iend+1; ii++) {
2287             for (jj=jstart; jj<jend+1; jj++) {
2288               for (kk=kstart; kk<kend+1; kk++) {
2289                 if (ii || jj || kk) {
2290                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
2291                     for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2292                   }
2293                 } else {
2294                   if (dfill) {
2295                     for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2296                   } else {
2297                     for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2298                   }
2299                 }
2300               }
2301             }
2302           }
2303           row  = l + nc*(slot);
2304           maxcnt = PetscMax(maxcnt,cnt);
2305           if (removedups) {
2306             ierr = MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
2307           } else {
2308             ierr = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
2309           }
2310         }
2311       }
2312     }
2313   }
2314   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
2315   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
2316   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2317   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
2318 
2319   /*
2320     For each node in the grid: we get the neighbors in the local (on processor ordering
2321     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2322     PETSc ordering.
2323   */
2324   if (!da->prealloc_only) {
2325     ierr = PetscCalloc1(maxcnt,&values);CHKERRQ(ierr);
2326     for (i=xs; i<xs+nx; i++) {
2327       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2328       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2329       for (j=ys; j<ys+ny; j++) {
2330         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2331         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2332         for (k=zs; k<zs+nz; k++) {
2333           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2334           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2335 
2336           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2337 
2338           for (l=0; l<nc; l++) {
2339             cnt = 0;
2340             for (ii=istart; ii<iend+1; ii++) {
2341               for (jj=jstart; jj<jend+1; jj++) {
2342                 for (kk=kstart; kk<kend+1; kk++) {
2343                   if (ii || jj || kk) {
2344                     if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
2345                       for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2346                     }
2347                   } else {
2348                     if (dfill) {
2349                       for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2350                     } else {
2351                       for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2352                     }
2353                   }
2354                 }
2355               }
2356             }
2357             row  = l + nc*(slot);
2358             ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
2359           }
2360         }
2361       }
2362     }
2363     ierr = PetscFree(values);CHKERRQ(ierr);
2364     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2365     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2366     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2367   }
2368   ierr = PetscFree(cols);CHKERRQ(ierr);
2369   PetscFunctionReturn(0);
2370 }
2371