xref: /petsc/src/dm/impls/da/fdda.c (revision fc8a9adeb7fcdc98711d755fa2dc544ddccf0f3e)
1 
2 #include <petsc/private/dmdaimpl.h> /*I      "petscdmda.h"     I*/
3 #include <petscmat.h>
4 
5 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM,ISColoringType,ISColoring*);
6 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM,ISColoringType,ISColoring*);
7 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM,ISColoringType,ISColoring*);
8 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM,ISColoringType,ISColoring*);
9 
10 /*
11    For ghost i that may be negative or greater than the upper bound this
12   maps it into the 0:m-1 range using periodicity
13 */
14 #define SetInRange(i,m) ((i < 0) ? m+i : ((i >= m) ? i-m : i))
15 
16 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill,PetscInt w,PetscInt **rfill)
17 {
18   PetscErrorCode ierr;
19   PetscInt       i,j,nz,*fill;
20 
21   PetscFunctionBegin;
22   if (!dfill) PetscFunctionReturn(0);
23 
24   /* count number nonzeros */
25   nz = 0;
26   for (i=0; i<w; i++) {
27     for (j=0; j<w; j++) {
28       if (dfill[w*i+j]) nz++;
29     }
30   }
31   ierr = PetscMalloc1(nz + w + 1,&fill);CHKERRQ(ierr);
32   /* construct modified CSR storage of nonzero structure */
33   /*  fill[0 -- w] marks starts of each row of column indices (and end of last row)
34    so fill[1] - fill[0] gives number of nonzeros in first row etc */
35   nz = w + 1;
36   for (i=0; i<w; i++) {
37     fill[i] = nz;
38     for (j=0; j<w; j++) {
39       if (dfill[w*i+j]) {
40         fill[nz] = j;
41         nz++;
42       }
43     }
44   }
45   fill[w] = nz;
46 
47   *rfill = fill;
48   PetscFunctionReturn(0);
49 }
50 
51 
52 static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse,PetscInt w,PetscInt **rfill)
53 {
54   PetscErrorCode ierr;
55   PetscInt       nz;
56 
57   PetscFunctionBegin;
58   if (!dfillsparse) PetscFunctionReturn(0);
59 
60   /* Determine number of non-zeros */
61   nz = (dfillsparse[w] - w - 1);
62 
63   /* Allocate space for our copy of the given sparse matrix representation. */
64   ierr = PetscMalloc1(nz + w + 1,rfill);CHKERRQ(ierr);
65   ierr = PetscMemcpy(*rfill,dfillsparse,(nz+w+1)*sizeof(PetscInt));CHKERRQ(ierr);
66   PetscFunctionReturn(0);
67 }
68 
69 
70 static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd)
71 {
72   PetscErrorCode ierr;
73   PetscInt       i,k,cnt = 1;
74 
75   PetscFunctionBegin;
76 
77   /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of
78    columns to the left with any nonzeros in them plus 1 */
79   ierr = PetscCalloc1(dd->w,&dd->ofillcols);CHKERRQ(ierr);
80   for (i=0; i<dd->w; i++) {
81     for (k=dd->ofill[i]; k<dd->ofill[i+1]; k++) dd->ofillcols[dd->ofill[k]] = 1;
82   }
83   for (i=0; i<dd->w; i++) {
84     if (dd->ofillcols[i]) {
85       dd->ofillcols[i] = cnt++;
86     }
87   }
88   PetscFunctionReturn(0);
89 }
90 
91 
92 
93 /*@
94     DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem
95     of the matrix returned by DMCreateMatrix().
96 
97     Logically Collective on DMDA
98 
99     Input Parameter:
100 +   da - the distributed array
101 .   dfill - the fill pattern in the diagonal block (may be NULL, means use dense block)
102 -   ofill - the fill pattern in the off-diagonal blocks
103 
104 
105     Level: developer
106 
107     Notes:
108     This only makes sense when you are doing multicomponent problems but using the
109        MPIAIJ matrix format
110 
111            The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries
112        representing coupling and 0 entries for missing coupling. For example
113 $             dfill[9] = {1, 0, 0,
114 $                         1, 1, 0,
115 $                         0, 1, 1}
116        means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with
117        itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the
118        diagonal block).
119 
120      DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
121      can be represented in the dfill, ofill format
122 
123    Contributed by Glenn Hammond
124 
125 .seealso DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly()
126 
127 @*/
128 PetscErrorCode  DMDASetBlockFills(DM da,const PetscInt *dfill,const PetscInt *ofill)
129 {
130   DM_DA          *dd = (DM_DA*)da->data;
131   PetscErrorCode ierr;
132 
133   PetscFunctionBegin;
134   /* save the given dfill and ofill information */
135   ierr = DMDASetBlockFills_Private(dfill,dd->w,&dd->dfill);CHKERRQ(ierr);
136   ierr = DMDASetBlockFills_Private(ofill,dd->w,&dd->ofill);CHKERRQ(ierr);
137 
138   /* count nonzeros in ofill columns */
139   ierr = DMDASetBlockFills_Private2(dd);CHKERRQ(ierr);
140 
141   PetscFunctionReturn(0);
142 }
143 
144 
145 /*@
146     DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem
147     of the matrix returned by DMCreateMatrix(), using sparse representations
148     of fill patterns.
149 
150     Logically Collective on DMDA
151 
152     Input Parameter:
153 +   da - the distributed array
154 .   dfill - the sparse fill pattern in the diagonal block (may be NULL, means use dense block)
155 -   ofill - the sparse fill pattern in the off-diagonal blocks
156 
157 
158     Level: developer
159 
160     Notes: This only makes sense when you are doing multicomponent problems but using the
161        MPIAIJ matrix format
162 
163            The format for dfill and ofill is a sparse representation of a
164            dof-by-dof matrix with 1 entries representing coupling and 0 entries
165            for missing coupling.  The sparse representation is a 1 dimensional
166            array of length nz + dof + 1, where nz is the number of non-zeros in
167            the matrix.  The first dof entries in the array give the
168            starting array indices of each row's items in the rest of the array,
169            the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array)
170            and the remaining nz items give the column indices of each of
171            the 1s within the logical 2D matrix.  Each row's items within
172            the array are the column indices of the 1s within that row
173            of the 2D matrix.  PETSc developers may recognize that this is the
174            same format as that computed by the DMDASetBlockFills_Private()
175            function from a dense 2D matrix representation.
176 
177      DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
178      can be represented in the dfill, ofill format
179 
180    Contributed by Philip C. Roth
181 
182 .seealso DMDASetBlockFills(), DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly()
183 
184 @*/
185 PetscErrorCode  DMDASetBlockFillsSparse(DM da,const PetscInt *dfillsparse,const PetscInt *ofillsparse)
186 {
187   DM_DA          *dd = (DM_DA*)da->data;
188   PetscErrorCode ierr;
189 
190   PetscFunctionBegin;
191   /* save the given dfill and ofill information */
192   ierr = DMDASetBlockFillsSparse_Private(dfillsparse,dd->w,&dd->dfill);CHKERRQ(ierr);
193   ierr = DMDASetBlockFillsSparse_Private(ofillsparse,dd->w,&dd->ofill);CHKERRQ(ierr);
194 
195   /* count nonzeros in ofill columns */
196   ierr = DMDASetBlockFills_Private2(dd);CHKERRQ(ierr);
197 
198   PetscFunctionReturn(0);
199 }
200 
201 
202 PetscErrorCode  DMCreateColoring_DA(DM da,ISColoringType ctype,ISColoring *coloring)
203 {
204   PetscErrorCode   ierr;
205   PetscInt         dim,m,n,p,nc;
206   DMBoundaryType bx,by,bz;
207   MPI_Comm         comm;
208   PetscMPIInt      size;
209   PetscBool        isBAIJ;
210   DM_DA            *dd = (DM_DA*)da->data;
211 
212   PetscFunctionBegin;
213   /*
214                                   m
215           ------------------------------------------------------
216          |                                                     |
217          |                                                     |
218          |               ----------------------                |
219          |               |                    |                |
220       n  |           yn  |                    |                |
221          |               |                    |                |
222          |               .---------------------                |
223          |             (xs,ys)     xn                          |
224          |            .                                        |
225          |         (gxs,gys)                                   |
226          |                                                     |
227           -----------------------------------------------------
228   */
229 
230   /*
231          nc - number of components per grid point
232          col - number of colors needed in one direction for single component problem
233 
234   */
235   ierr = DMDAGetInfo(da,&dim,0,0,0,&m,&n,&p,&nc,0,&bx,&by,&bz,0);CHKERRQ(ierr);
236 
237   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
238   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
239   if (ctype == IS_COLORING_LOCAL) {
240     if (size == 1) {
241       ctype = IS_COLORING_GLOBAL;
242     } else if (dim > 1) {
243       if ((m==1 && bx == DM_BOUNDARY_PERIODIC) || (n==1 && by == DM_BOUNDARY_PERIODIC) || (p==1 && bz == DM_BOUNDARY_PERIODIC)) {
244         SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain  on the same process");
245       }
246     }
247   }
248 
249   /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ
250      matrices is for the blocks, not the individual matrix elements  */
251   ierr = PetscStrcmp(da->mattype,MATBAIJ,&isBAIJ);CHKERRQ(ierr);
252   if (!isBAIJ) {ierr = PetscStrcmp(da->mattype,MATMPIBAIJ,&isBAIJ);CHKERRQ(ierr);}
253   if (!isBAIJ) {ierr = PetscStrcmp(da->mattype,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);}
254   if (isBAIJ) {
255     dd->w  = 1;
256     dd->xs = dd->xs/nc;
257     dd->xe = dd->xe/nc;
258     dd->Xs = dd->Xs/nc;
259     dd->Xe = dd->Xe/nc;
260   }
261 
262   /*
263      We do not provide a getcoloring function in the DMDA operations because
264    the basic DMDA does not know about matrices. We think of DMDA as being more
265    more low-level then matrices.
266   */
267   if (dim == 1) {
268     ierr = DMCreateColoring_DA_1d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
269   } else if (dim == 2) {
270     ierr =  DMCreateColoring_DA_2d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
271   } else if (dim == 3) {
272     ierr =  DMCreateColoring_DA_3d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
273   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not done for %D dimension, send us mail petsc-maint@mcs.anl.gov for code",dim);
274   if (isBAIJ) {
275     dd->w  = nc;
276     dd->xs = dd->xs*nc;
277     dd->xe = dd->xe*nc;
278     dd->Xs = dd->Xs*nc;
279     dd->Xe = dd->Xe*nc;
280   }
281   PetscFunctionReturn(0);
282 }
283 
284 /* ---------------------------------------------------------------------------------*/
285 
286 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
287 {
288   PetscErrorCode   ierr;
289   PetscInt         xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,M,N,dim,s,k,nc,col;
290   PetscInt         ncolors;
291   MPI_Comm         comm;
292   DMBoundaryType bx,by;
293   DMDAStencilType  st;
294   ISColoringValue  *colors;
295   DM_DA            *dd = (DM_DA*)da->data;
296 
297   PetscFunctionBegin;
298   /*
299          nc - number of components per grid point
300          col - number of colors needed in one direction for single component problem
301 
302   */
303   ierr = DMDAGetInfo(da,&dim,&m,&n,0,&M,&N,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
304   col  = 2*s + 1;
305   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
306   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
307   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
308 
309   /* special case as taught to us by Paul Hovland */
310   if (st == DMDA_STENCIL_STAR && s == 1) {
311     ierr = DMCreateColoring_DA_2d_5pt_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
312   } else {
313 
314     if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ2(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X (%d) is divisible\n\
315                                                             by 2*stencil_width + 1 (%d)\n", m, col);
316     if (by == DM_BOUNDARY_PERIODIC && (n % col)) SETERRQ2(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y (%d) is divisible\n\
317                                                             by 2*stencil_width + 1 (%d)\n", n, col);
318     if (ctype == IS_COLORING_GLOBAL) {
319       if (!dd->localcoloring) {
320         ierr = PetscMalloc1(nc*nx*ny,&colors);CHKERRQ(ierr);
321         ii   = 0;
322         for (j=ys; j<ys+ny; j++) {
323           for (i=xs; i<xs+nx; i++) {
324             for (k=0; k<nc; k++) {
325               colors[ii++] = k + nc*((i % col) + col*(j % col));
326             }
327           }
328         }
329         ncolors = nc + nc*(col-1 + col*(col-1));
330         ierr    = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
331       }
332       *coloring = dd->localcoloring;
333     } else if (ctype == IS_COLORING_LOCAL) {
334       if (!dd->ghostedcoloring) {
335         ierr = PetscMalloc1(nc*gnx*gny,&colors);CHKERRQ(ierr);
336         ii   = 0;
337         for (j=gys; j<gys+gny; j++) {
338           for (i=gxs; i<gxs+gnx; i++) {
339             for (k=0; k<nc; k++) {
340               /* the complicated stuff is to handle periodic boundaries */
341               colors[ii++] = k + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col));
342             }
343           }
344         }
345         ncolors = nc + nc*(col - 1 + col*(col-1));
346         ierr    = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
347         /* PetscIntView(ncolors,(PetscInt*)colors,0); */
348 
349         ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
350       }
351       *coloring = dd->ghostedcoloring;
352     } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
353   }
354   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
355   PetscFunctionReturn(0);
356 }
357 
358 /* ---------------------------------------------------------------------------------*/
359 
360 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
361 {
362   PetscErrorCode   ierr;
363   PetscInt         xs,ys,nx,ny,i,j,gxs,gys,gnx,gny,m,n,p,dim,s,k,nc,col,zs,gzs,ii,l,nz,gnz,M,N,P;
364   PetscInt         ncolors;
365   MPI_Comm         comm;
366   DMBoundaryType bx,by,bz;
367   DMDAStencilType  st;
368   ISColoringValue  *colors;
369   DM_DA            *dd = (DM_DA*)da->data;
370 
371   PetscFunctionBegin;
372   /*
373          nc - number of components per grid point
374          col - number of colors needed in one direction for single component problem
375 
376   */
377   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
378   col  = 2*s + 1;
379   if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
380                                                          by 2*stencil_width + 1\n");
381   if (by == DM_BOUNDARY_PERIODIC && (n % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
382                                                          by 2*stencil_width + 1\n");
383   if (bz == DM_BOUNDARY_PERIODIC && (p % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
384                                                          by 2*stencil_width + 1\n");
385 
386   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
387   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
388   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
389 
390   /* create the coloring */
391   if (ctype == IS_COLORING_GLOBAL) {
392     if (!dd->localcoloring) {
393       ierr = PetscMalloc1(nc*nx*ny*nz,&colors);CHKERRQ(ierr);
394       ii   = 0;
395       for (k=zs; k<zs+nz; k++) {
396         for (j=ys; j<ys+ny; j++) {
397           for (i=xs; i<xs+nx; i++) {
398             for (l=0; l<nc; l++) {
399               colors[ii++] = l + nc*((i % col) + col*(j % col) + col*col*(k % col));
400             }
401           }
402         }
403       }
404       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
405       ierr    = ISColoringCreate(comm,ncolors,nc*nx*ny*nz,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
406     }
407     *coloring = dd->localcoloring;
408   } else if (ctype == IS_COLORING_LOCAL) {
409     if (!dd->ghostedcoloring) {
410       ierr = PetscMalloc1(nc*gnx*gny*gnz,&colors);CHKERRQ(ierr);
411       ii   = 0;
412       for (k=gzs; k<gzs+gnz; k++) {
413         for (j=gys; j<gys+gny; j++) {
414           for (i=gxs; i<gxs+gnx; i++) {
415             for (l=0; l<nc; l++) {
416               /* the complicated stuff is to handle periodic boundaries */
417               colors[ii++] = l + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col) + col*col*(SetInRange(k,p) % col));
418             }
419           }
420         }
421       }
422       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
423       ierr    = ISColoringCreate(comm,ncolors,nc*gnx*gny*gnz,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
424       ierr    = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
425     }
426     *coloring = dd->ghostedcoloring;
427   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
428   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
429   PetscFunctionReturn(0);
430 }
431 
432 /* ---------------------------------------------------------------------------------*/
433 
434 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
435 {
436   PetscErrorCode   ierr;
437   PetscInt         xs,nx,i,i1,gxs,gnx,l,m,M,dim,s,nc,col;
438   PetscInt         ncolors;
439   MPI_Comm         comm;
440   DMBoundaryType bx;
441   ISColoringValue  *colors;
442   DM_DA            *dd = (DM_DA*)da->data;
443 
444   PetscFunctionBegin;
445   /*
446          nc - number of components per grid point
447          col - number of colors needed in one direction for single component problem
448 
449   */
450   ierr = DMDAGetInfo(da,&dim,&m,0,0,&M,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr);
451   col  = 2*s + 1;
452 
453   if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ2(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points %d is divisible\n\
454                                                           by 2*stencil_width + 1 %d\n",(int)m,(int)col);
455 
456   ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
457   ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
458   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
459 
460   /* create the coloring */
461   if (ctype == IS_COLORING_GLOBAL) {
462     if (!dd->localcoloring) {
463       ierr = PetscMalloc1(nc*nx,&colors);CHKERRQ(ierr);
464       if (dd->ofillcols) {
465         PetscInt tc = 0;
466         for (i=0; i<nc; i++) tc += (PetscInt) (dd->ofillcols[i] > 0);
467         i1 = 0;
468         for (i=xs; i<xs+nx; i++) {
469           for (l=0; l<nc; l++) {
470             if (dd->ofillcols[l] && (i % col)) {
471               colors[i1++] =  nc - 1 + tc*((i % col) - 1) + dd->ofillcols[l];
472             } else {
473               colors[i1++] = l;
474             }
475           }
476         }
477         ncolors = nc + 2*s*tc;
478       } else {
479         i1 = 0;
480         for (i=xs; i<xs+nx; i++) {
481           for (l=0; l<nc; l++) {
482             colors[i1++] = l + nc*(i % col);
483           }
484         }
485         ncolors = nc + nc*(col-1);
486       }
487       ierr = ISColoringCreate(comm,ncolors,nc*nx,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
488     }
489     *coloring = dd->localcoloring;
490   } else if (ctype == IS_COLORING_LOCAL) {
491     if (!dd->ghostedcoloring) {
492       ierr = PetscMalloc1(nc*gnx,&colors);CHKERRQ(ierr);
493       i1   = 0;
494       for (i=gxs; i<gxs+gnx; i++) {
495         for (l=0; l<nc; l++) {
496           /* the complicated stuff is to handle periodic boundaries */
497           colors[i1++] = l + nc*(SetInRange(i,m) % col);
498         }
499       }
500       ncolors = nc + nc*(col-1);
501       ierr    = ISColoringCreate(comm,ncolors,nc*gnx,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
502       ierr    = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
503     }
504     *coloring = dd->ghostedcoloring;
505   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
506   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
507   PetscFunctionReturn(0);
508 }
509 
510 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
511 {
512   PetscErrorCode   ierr;
513   PetscInt         xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,dim,s,k,nc;
514   PetscInt         ncolors;
515   MPI_Comm         comm;
516   DMBoundaryType bx,by;
517   ISColoringValue  *colors;
518   DM_DA            *dd = (DM_DA*)da->data;
519 
520   PetscFunctionBegin;
521   /*
522          nc - number of components per grid point
523          col - number of colors needed in one direction for single component problem
524 
525   */
526   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,0);CHKERRQ(ierr);
527   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
528   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
529   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
530 
531   if (bx == DM_BOUNDARY_PERIODIC && (m % 5)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible by 5\n");
532   if (by == DM_BOUNDARY_PERIODIC && (n % 5)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible by 5\n");
533 
534   /* create the coloring */
535   if (ctype == IS_COLORING_GLOBAL) {
536     if (!dd->localcoloring) {
537       ierr = PetscMalloc1(nc*nx*ny,&colors);CHKERRQ(ierr);
538       ii   = 0;
539       for (j=ys; j<ys+ny; j++) {
540         for (i=xs; i<xs+nx; i++) {
541           for (k=0; k<nc; k++) {
542             colors[ii++] = k + nc*((3*j+i) % 5);
543           }
544         }
545       }
546       ncolors = 5*nc;
547       ierr    = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
548     }
549     *coloring = dd->localcoloring;
550   } else if (ctype == IS_COLORING_LOCAL) {
551     if (!dd->ghostedcoloring) {
552       ierr = PetscMalloc1(nc*gnx*gny,&colors);CHKERRQ(ierr);
553       ii = 0;
554       for (j=gys; j<gys+gny; j++) {
555         for (i=gxs; i<gxs+gnx; i++) {
556           for (k=0; k<nc; k++) {
557             colors[ii++] = k + nc*((3*SetInRange(j,n) + SetInRange(i,m)) % 5);
558           }
559         }
560       }
561       ncolors = 5*nc;
562       ierr    = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
563       ierr    = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
564     }
565     *coloring = dd->ghostedcoloring;
566   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
567   PetscFunctionReturn(0);
568 }
569 
570 /* =========================================================================== */
571 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM,Mat);
572 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM,Mat);
573 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM,Mat);
574 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM,Mat);
575 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM,Mat);
576 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM,Mat);
577 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM,Mat);
578 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM,Mat);
579 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM,Mat);
580 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM,Mat);
581 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM,Mat);
582 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM,Mat);
583 extern PetscErrorCode DMCreateMatrix_DA_IS(DM,Mat);
584 
585 /*@C
586    MatSetupDM - Sets the DMDA that is to be used by the HYPRE_StructMatrix PETSc matrix
587 
588    Logically Collective on Mat
589 
590    Input Parameters:
591 +  mat - the matrix
592 -  da - the da
593 
594    Level: intermediate
595 
596 @*/
597 PetscErrorCode MatSetupDM(Mat mat,DM da)
598 {
599   PetscErrorCode ierr;
600 
601   PetscFunctionBegin;
602   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
603   PetscValidHeaderSpecificType(da,DM_CLASSID,1,DMDA);
604   ierr = PetscTryMethod(mat,"MatSetupDM_C",(Mat,DM),(mat,da));CHKERRQ(ierr);
605   PetscFunctionReturn(0);
606 }
607 
608 PetscErrorCode  MatView_MPI_DA(Mat A,PetscViewer viewer)
609 {
610   DM                da;
611   PetscErrorCode    ierr;
612   const char        *prefix;
613   Mat               Anatural;
614   AO                ao;
615   PetscInt          rstart,rend,*petsc,i;
616   IS                is;
617   MPI_Comm          comm;
618   PetscViewerFormat format;
619 
620   PetscFunctionBegin;
621   /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */
622   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
623   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(0);
624 
625   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
626   ierr = MatGetDM(A, &da);CHKERRQ(ierr);
627   if (!da) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
628 
629   ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);
630   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
631   ierr = PetscMalloc1(rend-rstart,&petsc);CHKERRQ(ierr);
632   for (i=rstart; i<rend; i++) petsc[i-rstart] = i;
633   ierr = AOApplicationToPetsc(ao,rend-rstart,petsc);CHKERRQ(ierr);
634   ierr = ISCreateGeneral(comm,rend-rstart,petsc,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
635 
636   /* call viewer on natural ordering */
637   ierr = MatCreateSubMatrix(A,is,is,MAT_INITIAL_MATRIX,&Anatural);CHKERRQ(ierr);
638   ierr = ISDestroy(&is);CHKERRQ(ierr);
639   ierr = PetscObjectGetOptionsPrefix((PetscObject)A,&prefix);CHKERRQ(ierr);
640   ierr = PetscObjectSetOptionsPrefix((PetscObject)Anatural,prefix);CHKERRQ(ierr);
641   ierr = PetscObjectSetName((PetscObject)Anatural,((PetscObject)A)->name);CHKERRQ(ierr);
642   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE;
643   ierr = MatView(Anatural,viewer);CHKERRQ(ierr);
644   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE;
645   ierr = MatDestroy(&Anatural);CHKERRQ(ierr);
646   PetscFunctionReturn(0);
647 }
648 
649 PetscErrorCode  MatLoad_MPI_DA(Mat A,PetscViewer viewer)
650 {
651   DM             da;
652   PetscErrorCode ierr;
653   Mat            Anatural,Aapp;
654   AO             ao;
655   PetscInt       rstart,rend,*app,i,m,n,M,N;
656   IS             is;
657   MPI_Comm       comm;
658 
659   PetscFunctionBegin;
660   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
661   ierr = MatGetDM(A, &da);CHKERRQ(ierr);
662   if (!da) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
663 
664   /* Load the matrix in natural ordering */
665   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Anatural);CHKERRQ(ierr);
666   ierr = MatSetType(Anatural,((PetscObject)A)->type_name);CHKERRQ(ierr);
667   ierr = MatGetSize(A,&M,&N);CHKERRQ(ierr);
668   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
669   ierr = MatSetSizes(Anatural,m,n,M,N);CHKERRQ(ierr);
670   ierr = MatLoad(Anatural,viewer);CHKERRQ(ierr);
671 
672   /* Map natural ordering to application ordering and create IS */
673   ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);
674   ierr = MatGetOwnershipRange(Anatural,&rstart,&rend);CHKERRQ(ierr);
675   ierr = PetscMalloc1(rend-rstart,&app);CHKERRQ(ierr);
676   for (i=rstart; i<rend; i++) app[i-rstart] = i;
677   ierr = AOPetscToApplication(ao,rend-rstart,app);CHKERRQ(ierr);
678   ierr = ISCreateGeneral(comm,rend-rstart,app,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
679 
680   /* Do permutation and replace header */
681   ierr = MatCreateSubMatrix(Anatural,is,is,MAT_INITIAL_MATRIX,&Aapp);CHKERRQ(ierr);
682   ierr = MatHeaderReplace(A,&Aapp);CHKERRQ(ierr);
683   ierr = ISDestroy(&is);CHKERRQ(ierr);
684   ierr = MatDestroy(&Anatural);CHKERRQ(ierr);
685   PetscFunctionReturn(0);
686 }
687 
688 PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J)
689 {
690   PetscErrorCode ierr;
691   PetscInt       dim,dof,nx,ny,nz,dims[3],starts[3],M,N,P;
692   Mat            A;
693   MPI_Comm       comm;
694   MatType        Atype;
695   PetscSection   section, sectionGlobal;
696   void           (*aij)(void)=NULL,(*baij)(void)=NULL,(*sbaij)(void)=NULL,(*sell)(void)=NULL,(*is)(void)=NULL;
697   MatType        mtype;
698   PetscMPIInt    size;
699   DM_DA          *dd = (DM_DA*)da->data;
700 
701   PetscFunctionBegin;
702   ierr = MatInitializePackage();CHKERRQ(ierr);
703   mtype = da->mattype;
704 
705   ierr = DMGetSection(da, &section);CHKERRQ(ierr);
706   if (section) {
707     PetscInt  bs = -1;
708     PetscInt  localSize;
709     PetscBool isShell, isBlock, isSeqBlock, isMPIBlock, isSymBlock, isSymSeqBlock, isSymMPIBlock, isSymmetric;
710 
711     ierr = DMGetGlobalSection(da, &sectionGlobal);CHKERRQ(ierr);
712     ierr = PetscSectionGetConstrainedStorageSize(sectionGlobal, &localSize);CHKERRQ(ierr);
713     ierr = MatCreate(PetscObjectComm((PetscObject)da),&A);CHKERRQ(ierr);
714     ierr = MatSetSizes(A,localSize,localSize,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
715     ierr = MatSetType(A,mtype);CHKERRQ(ierr);
716     ierr = PetscStrcmp(mtype,MATSHELL,&isShell);CHKERRQ(ierr);
717     ierr = PetscStrcmp(mtype,MATBAIJ,&isBlock);CHKERRQ(ierr);
718     ierr = PetscStrcmp(mtype,MATSEQBAIJ,&isSeqBlock);CHKERRQ(ierr);
719     ierr = PetscStrcmp(mtype,MATMPIBAIJ,&isMPIBlock);CHKERRQ(ierr);
720     ierr = PetscStrcmp(mtype,MATSBAIJ,&isSymBlock);CHKERRQ(ierr);
721     ierr = PetscStrcmp(mtype,MATSEQSBAIJ,&isSymSeqBlock);CHKERRQ(ierr);
722     ierr = PetscStrcmp(mtype,MATMPISBAIJ,&isSymMPIBlock);CHKERRQ(ierr);
723     /* Check for symmetric storage */
724     isSymmetric = (PetscBool) (isSymBlock || isSymSeqBlock || isSymMPIBlock);
725     if (isSymmetric) {
726       ierr = MatSetOption(*J, MAT_IGNORE_LOWER_TRIANGULAR, PETSC_TRUE);CHKERRQ(ierr);
727     }
728     if (!isShell) {
729       PetscInt *dnz, *onz, *dnzu, *onzu, bsLocal;
730 
731       if (bs < 0) {
732         if (isBlock || isSeqBlock || isMPIBlock || isSymBlock || isSymSeqBlock || isSymMPIBlock) {
733           PetscInt pStart, pEnd, p, dof;
734 
735           ierr = PetscSectionGetChart(sectionGlobal, &pStart, &pEnd);CHKERRQ(ierr);
736           for (p = pStart; p < pEnd; ++p) {
737             ierr = PetscSectionGetDof(sectionGlobal, p, &dof);CHKERRQ(ierr);
738             if (dof) {
739               bs = dof;
740               break;
741             }
742           }
743         } else {
744           bs = 1;
745         }
746         /* Must have same blocksize on all procs (some might have no points) */
747         bsLocal = bs;
748         ierr    = MPIU_Allreduce(&bsLocal, &bs, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)da));CHKERRQ(ierr);
749       }
750       ierr = PetscCalloc4(localSize/bs, &dnz, localSize/bs, &onz, localSize/bs, &dnzu, localSize/bs, &onzu);CHKERRQ(ierr);
751       /* ierr = DMPlexPreallocateOperator(dm, bs, section, sectionGlobal, dnz, onz, dnzu, onzu, *J, fillMatrix);CHKERRQ(ierr); */
752       ierr = PetscFree4(dnz, onz, dnzu, onzu);CHKERRQ(ierr);
753     }
754   }
755   /*
756                                   m
757           ------------------------------------------------------
758          |                                                     |
759          |                                                     |
760          |               ----------------------                |
761          |               |                    |                |
762       n  |           ny  |                    |                |
763          |               |                    |                |
764          |               .---------------------                |
765          |             (xs,ys)     nx                          |
766          |            .                                        |
767          |         (gxs,gys)                                   |
768          |                                                     |
769           -----------------------------------------------------
770   */
771 
772   /*
773          nc - number of components per grid point
774          col - number of colors needed in one direction for single component problem
775 
776   */
777   M   = dd->M;
778   N   = dd->N;
779   P   = dd->P;
780   dim = da->dim;
781   dof = dd->w;
782   /* ierr = DMDAGetInfo(da,&dim,&M,&N,&P,0,0,0,&dof,0,0,0,0,0);CHKERRQ(ierr); */
783   ierr = DMDAGetCorners(da,0,0,0,&nx,&ny,&nz);CHKERRQ(ierr);
784   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
785   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
786   ierr = MatSetSizes(A,dof*nx*ny*nz,dof*nx*ny*nz,dof*M*N*P,dof*M*N*P);CHKERRQ(ierr);
787   ierr = MatSetType(A,mtype);CHKERRQ(ierr);
788   ierr = MatSetFromOptions(A);CHKERRQ(ierr);
789   ierr = MatSetDM(A,da);CHKERRQ(ierr);
790   if (da->structure_only) {
791     ierr = MatSetOption(A,MAT_STRUCTURE_ONLY,PETSC_TRUE);CHKERRQ(ierr);
792   }
793   ierr = MatGetType(A,&Atype);CHKERRQ(ierr);
794   /*
795      We do not provide a getmatrix function in the DMDA operations because
796    the basic DMDA does not know about matrices. We think of DMDA as being more
797    more low-level than matrices. This is kind of cheating but, cause sometimes
798    we think of DMDA has higher level than matrices.
799 
800      We could switch based on Atype (or mtype), but we do not since the
801    specialized setting routines depend only on the particular preallocation
802    details of the matrix, not the type itself.
803   */
804   ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJSetPreallocation_C",&aij);CHKERRQ(ierr);
805   if (!aij) {
806     ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJSetPreallocation_C",&aij);CHKERRQ(ierr);
807   }
808   if (!aij) {
809     ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIBAIJSetPreallocation_C",&baij);CHKERRQ(ierr);
810     if (!baij) {
811       ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",&baij);CHKERRQ(ierr);
812     }
813     if (!baij) {
814       ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPISBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr);
815       if (!sbaij) {
816         ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr);
817       }
818       if (!sbaij) {
819         ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPISELLSetPreallocation_C",&sell);CHKERRQ(ierr);
820         if (!sell) {
821           ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqSELLSetPreallocation_C",&sell);CHKERRQ(ierr);
822         }
823       }
824       if (!sell) {
825         ierr = PetscObjectQueryFunction((PetscObject)A,"MatISSetPreallocation_C",&is);CHKERRQ(ierr);
826       }
827     }
828   }
829   if (aij) {
830     if (dim == 1) {
831       if (dd->ofill) {
832         ierr = DMCreateMatrix_DA_1d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
833       } else {
834         ierr = DMCreateMatrix_DA_1d_MPIAIJ(da,A);CHKERRQ(ierr);
835       }
836     } else if (dim == 2) {
837       if (dd->ofill) {
838         ierr = DMCreateMatrix_DA_2d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
839       } else {
840         ierr = DMCreateMatrix_DA_2d_MPIAIJ(da,A);CHKERRQ(ierr);
841       }
842     } else if (dim == 3) {
843       if (dd->ofill) {
844         ierr = DMCreateMatrix_DA_3d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
845       } else {
846         ierr = DMCreateMatrix_DA_3d_MPIAIJ(da,A);CHKERRQ(ierr);
847       }
848     }
849   } else if (baij) {
850     if (dim == 2) {
851       ierr = DMCreateMatrix_DA_2d_MPIBAIJ(da,A);CHKERRQ(ierr);
852     } else if (dim == 3) {
853       ierr = DMCreateMatrix_DA_3d_MPIBAIJ(da,A);CHKERRQ(ierr);
854     } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
855   } else if (sbaij) {
856     if (dim == 2) {
857       ierr = DMCreateMatrix_DA_2d_MPISBAIJ(da,A);CHKERRQ(ierr);
858     } else if (dim == 3) {
859       ierr = DMCreateMatrix_DA_3d_MPISBAIJ(da,A);CHKERRQ(ierr);
860     } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
861   } else if (sell) {
862      if (dim == 2) {
863        ierr = DMCreateMatrix_DA_2d_MPISELL(da,A);CHKERRQ(ierr);
864      } else if (dim == 3) {
865        ierr = DMCreateMatrix_DA_3d_MPISELL(da,A);CHKERRQ(ierr);
866      } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
867   } else if (is) {
868     ierr = DMCreateMatrix_DA_IS(da,A);CHKERRQ(ierr);
869   } else {
870     ISLocalToGlobalMapping ltog;
871 
872     ierr = MatSetBlockSize(A,dof);CHKERRQ(ierr);
873     ierr = MatSetUp(A);CHKERRQ(ierr);
874     ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
875     ierr = MatSetLocalToGlobalMapping(A,ltog,ltog);CHKERRQ(ierr);
876   }
877   ierr = DMDAGetGhostCorners(da,&starts[0],&starts[1],&starts[2],&dims[0],&dims[1],&dims[2]);CHKERRQ(ierr);
878   ierr = MatSetStencil(A,dim,dims,starts,dof);CHKERRQ(ierr);
879   ierr = MatSetDM(A,da);CHKERRQ(ierr);
880   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
881   if (size > 1) {
882     /* change viewer to display matrix in natural ordering */
883     ierr = MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA);CHKERRQ(ierr);
884     ierr = MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA);CHKERRQ(ierr);
885   }
886   *J = A;
887   PetscFunctionReturn(0);
888 }
889 
890 /* ---------------------------------------------------------------------------------*/
891 PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
892 
893 PetscErrorCode DMCreateMatrix_DA_IS(DM dm,Mat J)
894 {
895   DM_DA                  *da = (DM_DA*)dm->data;
896   Mat                    lJ;
897   ISLocalToGlobalMapping ltog;
898   IS                     is_loc_filt, is_glob;
899   const PetscInt         *e_loc,*idx;
900   PetscInt               nel,nen,nv,dof,dim,*gidx,nb;
901   PetscBool              flg;
902   PetscErrorCode         ierr;
903 
904   /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted)
905      We need to filter the local indices that are represented through the DMDAGetElements decomposition
906      This is because the size of the local matrices in MATIS is the local size of the l2g map */
907   PetscFunctionBegin;
908   dof  = da->w;
909   dim  = dm->dim;
910 
911   ierr = MatSetBlockSize(J,dof);CHKERRQ(ierr);
912 
913   /* get local elements indices in local DMDA numbering */
914   ierr = DMDAGetElements(dm,&nel,&nen,&e_loc);CHKERRQ(ierr); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */
915   ierr = ISCreateBlock(PetscObjectComm((PetscObject)dm),dof,nel*nen,e_loc,PETSC_COPY_VALUES,&is_loc_filt);CHKERRQ(ierr);
916   ierr = DMDARestoreElements(dm,&nel,&nen,&e_loc);CHKERRQ(ierr);
917 
918   /* obtain a consistent local ordering for MATIS */
919   ierr = ISSortRemoveDups(is_loc_filt);CHKERRQ(ierr);
920   ierr = ISBlockGetLocalSize(is_loc_filt,&nb);CHKERRQ(ierr);
921   ierr = DMGetLocalToGlobalMapping(dm,&ltog);CHKERRQ(ierr);
922   ierr = ISLocalToGlobalMappingGetSize(ltog,&nv);CHKERRQ(ierr);
923   ierr = PetscMalloc1(PetscMax(nb,nv/dof),&gidx);CHKERRQ(ierr);
924   ierr = ISBlockGetIndices(is_loc_filt,&idx);CHKERRQ(ierr);
925   ierr = ISLocalToGlobalMappingApplyBlock(ltog,nb,idx,gidx);CHKERRQ(ierr);
926   ierr = ISBlockRestoreIndices(is_loc_filt,&idx);CHKERRQ(ierr);
927   ierr = ISCreateBlock(PetscObjectComm((PetscObject)dm),dof,nb,gidx,PETSC_USE_POINTER,&is_glob);CHKERRQ(ierr);
928   ierr = ISLocalToGlobalMappingCreateIS(is_glob,&ltog);CHKERRQ(ierr);
929   ierr = ISDestroy(&is_glob);CHKERRQ(ierr);
930   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
931   ierr = ISLocalToGlobalMappingDestroy(&ltog);CHKERRQ(ierr);
932 
933   /* We also attach a l2g map to the local matrices to have MatSetValueLocal to work */
934   ierr = MatISGetLocalMat(J,&lJ);CHKERRQ(ierr);
935   ierr = ISLocalToGlobalMappingCreateIS(is_loc_filt,&ltog);CHKERRQ(ierr);
936   ierr = ISDestroy(&is_loc_filt);CHKERRQ(ierr);
937   ierr = ISCreateStride(PetscObjectComm((PetscObject)lJ),nv/dof,0,1,&is_glob);CHKERRQ(ierr);
938   ierr = ISGetIndices(is_glob,&idx);CHKERRQ(ierr);
939   ierr = ISGlobalToLocalMappingApplyBlock(ltog,IS_GTOLM_MASK,nv/dof,idx,&nb,gidx);CHKERRQ(ierr);
940   ierr = ISRestoreIndices(is_glob,&idx);CHKERRQ(ierr);
941   ierr = ISDestroy(&is_glob);CHKERRQ(ierr);
942   ierr = ISLocalToGlobalMappingDestroy(&ltog);CHKERRQ(ierr);
943   ierr = ISCreateBlock(PETSC_COMM_SELF,dof,nb,gidx,PETSC_USE_POINTER,&is_loc_filt);CHKERRQ(ierr);
944   ierr = ISLocalToGlobalMappingCreateIS(is_loc_filt,&ltog);CHKERRQ(ierr);
945   ierr = ISDestroy(&is_loc_filt);CHKERRQ(ierr);
946   ierr = MatSetLocalToGlobalMapping(lJ,ltog,ltog);CHKERRQ(ierr);
947   ierr = ISLocalToGlobalMappingDestroy(&ltog);CHKERRQ(ierr);
948   ierr = PetscFree(gidx);CHKERRQ(ierr);
949 
950   /* Preallocation (not exact): we reuse the preallocation routines of the assembled version  */
951   flg = dm->prealloc_only;
952   dm->prealloc_only = PETSC_TRUE;
953   switch (dim) {
954   case 1:
955     ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",MatISSetPreallocation_IS);CHKERRQ(ierr);
956     ierr = DMCreateMatrix_DA_1d_MPIAIJ(dm,J);CHKERRQ(ierr);
957     ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
958     break;
959   case 2:
960     ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",MatISSetPreallocation_IS);CHKERRQ(ierr);
961     ierr = DMCreateMatrix_DA_2d_MPIAIJ(dm,J);CHKERRQ(ierr);
962     ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
963     break;
964   case 3:
965     ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",MatISSetPreallocation_IS);CHKERRQ(ierr);
966     ierr = DMCreateMatrix_DA_3d_MPIAIJ(dm,J);CHKERRQ(ierr);
967     ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
968     break;
969   default:
970     SETERRQ1(PetscObjectComm((PetscObject)dm),PETSC_ERR_SUP,"Unhandled dimension %d",dim);
971     break;
972   }
973   dm->prealloc_only = flg;
974   PetscFunctionReturn(0);
975 }
976 
977 PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da,Mat J)
978 {
979   PetscErrorCode         ierr;
980   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p;
981   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
982   MPI_Comm               comm;
983   PetscScalar            *values;
984   DMBoundaryType         bx,by;
985   ISLocalToGlobalMapping ltog;
986   DMDAStencilType        st;
987 
988   PetscFunctionBegin;
989   /*
990          nc - number of components per grid point
991          col - number of colors needed in one direction for single component problem
992 
993   */
994   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
995   col  = 2*s + 1;
996   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
997   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
998   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
999 
1000   ierr = PetscMalloc2(nc,&rows,col*col*nc*nc,&cols);CHKERRQ(ierr);
1001   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1002 
1003   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1004   /* determine the matrix preallocation information */
1005   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
1006   for (i=xs; i<xs+nx; i++) {
1007 
1008     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1009     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1010 
1011     for (j=ys; j<ys+ny; j++) {
1012       slot = i - gxs + gnx*(j - gys);
1013 
1014       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1015       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1016 
1017       cnt = 0;
1018       for (k=0; k<nc; k++) {
1019         for (l=lstart; l<lend+1; l++) {
1020           for (p=pstart; p<pend+1; p++) {
1021             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
1022               cols[cnt++] = k + nc*(slot + gnx*l + p);
1023             }
1024           }
1025         }
1026         rows[k] = k + nc*(slot);
1027       }
1028       ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1029     }
1030   }
1031   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1032   ierr = MatSeqSELLSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1033   ierr = MatMPISELLSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1034   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1035 
1036   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1037 
1038   /*
1039     For each node in the grid: we get the neighbors in the local (on processor ordering
1040     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1041     PETSc ordering.
1042   */
1043   if (!da->prealloc_only) {
1044     ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr);
1045     for (i=xs; i<xs+nx; i++) {
1046 
1047       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1048       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1049 
1050       for (j=ys; j<ys+ny; j++) {
1051         slot = i - gxs + gnx*(j - gys);
1052 
1053         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1054         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1055 
1056         cnt = 0;
1057         for (k=0; k<nc; k++) {
1058           for (l=lstart; l<lend+1; l++) {
1059             for (p=pstart; p<pend+1; p++) {
1060               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
1061                 cols[cnt++] = k + nc*(slot + gnx*l + p);
1062               }
1063             }
1064           }
1065           rows[k] = k + nc*(slot);
1066         }
1067         ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1068       }
1069     }
1070     ierr = PetscFree(values);CHKERRQ(ierr);
1071     /* do not copy values to GPU since they are all zero and not yet needed there */
1072     ierr = MatPinToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1073     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1074     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1075     ierr = MatPinToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1076     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1077   }
1078   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1079   PetscFunctionReturn(0);
1080 }
1081 
1082 PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da,Mat J)
1083 {
1084   PetscErrorCode         ierr;
1085   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1086   PetscInt               m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL;
1087   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
1088   MPI_Comm               comm;
1089   PetscScalar            *values;
1090   DMBoundaryType         bx,by,bz;
1091   ISLocalToGlobalMapping ltog;
1092   DMDAStencilType        st;
1093 
1094   PetscFunctionBegin;
1095   /*
1096          nc - number of components per grid point
1097          col - number of colors needed in one direction for single component problem
1098 
1099   */
1100   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1101   col  = 2*s + 1;
1102   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1103   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1104   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1105 
1106   ierr = PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols);CHKERRQ(ierr);
1107   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1108 
1109   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1110   /* determine the matrix preallocation information */
1111   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1112   for (i=xs; i<xs+nx; i++) {
1113     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1114     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1115     for (j=ys; j<ys+ny; j++) {
1116       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1117       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1118       for (k=zs; k<zs+nz; k++) {
1119         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1120         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1121 
1122         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1123 
1124         cnt = 0;
1125         for (l=0; l<nc; l++) {
1126           for (ii=istart; ii<iend+1; ii++) {
1127             for (jj=jstart; jj<jend+1; jj++) {
1128               for (kk=kstart; kk<kend+1; kk++) {
1129                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1130                   cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1131                 }
1132               }
1133             }
1134           }
1135           rows[l] = l + nc*(slot);
1136         }
1137         ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1138       }
1139     }
1140   }
1141   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1142   ierr = MatSeqSELLSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1143   ierr = MatMPISELLSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1144   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1145   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1146 
1147   /*
1148     For each node in the grid: we get the neighbors in the local (on processor ordering
1149     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1150     PETSc ordering.
1151   */
1152   if (!da->prealloc_only) {
1153     ierr = PetscCalloc1(col*col*col*nc*nc*nc,&values);CHKERRQ(ierr);
1154     for (i=xs; i<xs+nx; i++) {
1155       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1156       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1157       for (j=ys; j<ys+ny; j++) {
1158         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1159         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1160         for (k=zs; k<zs+nz; k++) {
1161           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1162           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1163 
1164           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1165 
1166           cnt = 0;
1167           for (l=0; l<nc; l++) {
1168             for (ii=istart; ii<iend+1; ii++) {
1169               for (jj=jstart; jj<jend+1; jj++) {
1170                 for (kk=kstart; kk<kend+1; kk++) {
1171                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1172                     cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1173                   }
1174                 }
1175               }
1176             }
1177             rows[l] = l + nc*(slot);
1178           }
1179           ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1180         }
1181       }
1182     }
1183     ierr = PetscFree(values);CHKERRQ(ierr);
1184     /* do not copy values to GPU since they are all zero and not yet needed there */
1185     ierr = MatPinToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1186     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1187     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1188     ierr = MatPinToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1189     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1190   }
1191   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1192   PetscFunctionReturn(0);
1193 }
1194 
1195 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da,Mat J)
1196 {
1197   PetscErrorCode         ierr;
1198   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,M,N;
1199   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
1200   MPI_Comm               comm;
1201   PetscScalar            *values;
1202   DMBoundaryType         bx,by;
1203   ISLocalToGlobalMapping ltog,mltog;
1204   DMDAStencilType        st;
1205   PetscBool              removedups = PETSC_FALSE;
1206 
1207   PetscFunctionBegin;
1208   /*
1209          nc - number of components per grid point
1210          col - number of colors needed in one direction for single component problem
1211 
1212   */
1213   ierr = DMDAGetInfo(da,&dim,&m,&n,&M,&N,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
1214   col  = 2*s + 1;
1215   /*
1216        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1217        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1218   */
1219   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1220   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1221   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
1222   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
1223   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1224 
1225   ierr = PetscMalloc2(nc,&rows,col*col*nc*nc,&cols);CHKERRQ(ierr);
1226   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1227 
1228   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1229   /* determine the matrix preallocation information */
1230   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
1231   for (i=xs; i<xs+nx; i++) {
1232 
1233     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1234     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1235 
1236     for (j=ys; j<ys+ny; j++) {
1237       slot = i - gxs + gnx*(j - gys);
1238 
1239       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1240       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1241 
1242       cnt = 0;
1243       for (k=0; k<nc; k++) {
1244         for (l=lstart; l<lend+1; l++) {
1245           for (p=pstart; p<pend+1; p++) {
1246             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
1247               cols[cnt++] = k + nc*(slot + gnx*l + p);
1248             }
1249           }
1250         }
1251         rows[k] = k + nc*(slot);
1252       }
1253       if (removedups) {
1254         ierr = MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1255       } else {
1256         ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1257       }
1258     }
1259   }
1260   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1261   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1262   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1263   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1264   ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr);
1265   if (!mltog) {
1266     ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1267   }
1268 
1269   /*
1270     For each node in the grid: we get the neighbors in the local (on processor ordering
1271     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1272     PETSc ordering.
1273   */
1274   if (!da->prealloc_only) {
1275     ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr);
1276     for (i=xs; i<xs+nx; i++) {
1277 
1278       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1279       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1280 
1281       for (j=ys; j<ys+ny; j++) {
1282         slot = i - gxs + gnx*(j - gys);
1283 
1284         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1285         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1286 
1287         cnt = 0;
1288         for (k=0; k<nc; k++) {
1289           for (l=lstart; l<lend+1; l++) {
1290             for (p=pstart; p<pend+1; p++) {
1291               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
1292                 cols[cnt++] = k + nc*(slot + gnx*l + p);
1293               }
1294             }
1295           }
1296           rows[k] = k + nc*(slot);
1297         }
1298         ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1299       }
1300     }
1301     ierr = PetscFree(values);CHKERRQ(ierr);
1302     /* do not copy values to GPU since they are all zero and not yet needed there */
1303     ierr = MatPinToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1304     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1305     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1306     ierr = MatPinToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1307     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1308   }
1309   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1310   PetscFunctionReturn(0);
1311 }
1312 
1313 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da,Mat J)
1314 {
1315   PetscErrorCode         ierr;
1316   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1317   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt,maxcnt = 0,l,p,M,N;
1318   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
1319   DM_DA                  *dd = (DM_DA*)da->data;
1320   PetscInt               ifill_col,*ofill = dd->ofill, *dfill = dd->dfill;
1321   MPI_Comm               comm;
1322   PetscScalar            *values;
1323   DMBoundaryType         bx,by;
1324   ISLocalToGlobalMapping ltog;
1325   DMDAStencilType        st;
1326   PetscBool              removedups = PETSC_FALSE;
1327 
1328   PetscFunctionBegin;
1329   /*
1330          nc - number of components per grid point
1331          col - number of colors needed in one direction for single component problem
1332 
1333   */
1334   ierr = DMDAGetInfo(da,&dim,&m,&n,&M,&N,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
1335   col  = 2*s + 1;
1336   /*
1337        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1338        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1339   */
1340   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1341   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1342   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
1343   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
1344   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1345 
1346   ierr = PetscMalloc1(col*col*nc,&cols);CHKERRQ(ierr);
1347   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1348 
1349   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1350   /* determine the matrix preallocation information */
1351   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
1352   for (i=xs; i<xs+nx; i++) {
1353 
1354     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1355     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1356 
1357     for (j=ys; j<ys+ny; j++) {
1358       slot = i - gxs + gnx*(j - gys);
1359 
1360       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1361       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1362 
1363       for (k=0; k<nc; k++) {
1364         cnt = 0;
1365         for (l=lstart; l<lend+1; l++) {
1366           for (p=pstart; p<pend+1; p++) {
1367             if (l || p) {
1368               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
1369                 for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p);
1370               }
1371             } else {
1372               if (dfill) {
1373                 for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p);
1374               } else {
1375                 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p);
1376               }
1377             }
1378           }
1379         }
1380         row    = k + nc*(slot);
1381         maxcnt = PetscMax(maxcnt,cnt);
1382         if (removedups) {
1383           ierr   = MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1384         } else {
1385           ierr   = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1386         }
1387       }
1388     }
1389   }
1390   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1391   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1392   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1393   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1394 
1395   /*
1396     For each node in the grid: we get the neighbors in the local (on processor ordering
1397     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1398     PETSc ordering.
1399   */
1400   if (!da->prealloc_only) {
1401     ierr = PetscCalloc1(maxcnt,&values);CHKERRQ(ierr);
1402     for (i=xs; i<xs+nx; i++) {
1403 
1404       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1405       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1406 
1407       for (j=ys; j<ys+ny; j++) {
1408         slot = i - gxs + gnx*(j - gys);
1409 
1410         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1411         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1412 
1413         for (k=0; k<nc; k++) {
1414           cnt = 0;
1415           for (l=lstart; l<lend+1; l++) {
1416             for (p=pstart; p<pend+1; p++) {
1417               if (l || p) {
1418                 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
1419                   for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p);
1420                 }
1421               } else {
1422                 if (dfill) {
1423                   for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p);
1424                 } else {
1425                   for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p);
1426                 }
1427               }
1428             }
1429           }
1430           row  = k + nc*(slot);
1431           ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1432         }
1433       }
1434     }
1435     ierr = PetscFree(values);CHKERRQ(ierr);
1436     /* do not copy values to GPU since they are all zero and not yet needed there */
1437     ierr = MatPinToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1438     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1439     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1440     ierr = MatPinToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1441     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1442   }
1443   ierr = PetscFree(cols);CHKERRQ(ierr);
1444   PetscFunctionReturn(0);
1445 }
1446 
1447 /* ---------------------------------------------------------------------------------*/
1448 
1449 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da,Mat J)
1450 {
1451   PetscErrorCode         ierr;
1452   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1453   PetscInt               m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL;
1454   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
1455   MPI_Comm               comm;
1456   PetscScalar            *values;
1457   DMBoundaryType         bx,by,bz;
1458   ISLocalToGlobalMapping ltog,mltog;
1459   DMDAStencilType        st;
1460   PetscBool              removedups = PETSC_FALSE;
1461 
1462   PetscFunctionBegin;
1463   /*
1464          nc - number of components per grid point
1465          col - number of colors needed in one direction for single component problem
1466 
1467   */
1468   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1469   col  = 2*s + 1;
1470 
1471   /*
1472        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1473        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1474   */
1475   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1476   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1477   if (P == 1 && 2*s >= p) removedups = PETSC_TRUE;
1478 
1479   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1480   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1481   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1482 
1483   ierr = PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols);CHKERRQ(ierr);
1484   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1485 
1486   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1487   /* determine the matrix preallocation information */
1488   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1489   for (i=xs; i<xs+nx; i++) {
1490     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1491     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1492     for (j=ys; j<ys+ny; j++) {
1493       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1494       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1495       for (k=zs; k<zs+nz; k++) {
1496         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1497         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1498 
1499         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1500 
1501         cnt = 0;
1502         for (l=0; l<nc; l++) {
1503           for (ii=istart; ii<iend+1; ii++) {
1504             for (jj=jstart; jj<jend+1; jj++) {
1505               for (kk=kstart; kk<kend+1; kk++) {
1506                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1507                   cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1508                 }
1509               }
1510             }
1511           }
1512           rows[l] = l + nc*(slot);
1513         }
1514         if (removedups) {
1515           ierr = MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1516         } else {
1517           ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1518         }
1519       }
1520     }
1521   }
1522   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1523   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1524   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1525   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1526   ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr);
1527   if (!mltog) {
1528     ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1529   }
1530 
1531   /*
1532     For each node in the grid: we get the neighbors in the local (on processor ordering
1533     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1534     PETSc ordering.
1535   */
1536   if (!da->prealloc_only) {
1537     ierr = PetscCalloc1(col*col*col*nc*nc*nc,&values);CHKERRQ(ierr);
1538     for (i=xs; i<xs+nx; i++) {
1539       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1540       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1541       for (j=ys; j<ys+ny; j++) {
1542         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1543         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1544         for (k=zs; k<zs+nz; k++) {
1545           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1546           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1547 
1548           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1549 
1550           cnt = 0;
1551           for (l=0; l<nc; l++) {
1552             for (ii=istart; ii<iend+1; ii++) {
1553               for (jj=jstart; jj<jend+1; jj++) {
1554                 for (kk=kstart; kk<kend+1; kk++) {
1555                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1556                     cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1557                   }
1558                 }
1559               }
1560             }
1561             rows[l] = l + nc*(slot);
1562           }
1563           ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1564         }
1565       }
1566     }
1567     ierr = PetscFree(values);CHKERRQ(ierr);
1568     /* do not copy values to GPU since they are all zero and not yet needed there */
1569     ierr = MatPinToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1570     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1571     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1572     ierr = MatPinToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1573     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1574   }
1575   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1576   PetscFunctionReturn(0);
1577 }
1578 
1579 /* ---------------------------------------------------------------------------------*/
1580 
1581 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da,Mat J)
1582 {
1583   PetscErrorCode         ierr;
1584   DM_DA                  *dd = (DM_DA*)da->data;
1585   PetscInt               xs,nx,i,j,gxs,gnx,row,k,l;
1586   PetscInt               m,dim,s,*cols = NULL,nc,cnt,maxcnt = 0,*ocols;
1587   PetscInt               *ofill = dd->ofill,*dfill = dd->dfill;
1588   PetscScalar            *values;
1589   DMBoundaryType         bx;
1590   ISLocalToGlobalMapping ltog;
1591   PetscMPIInt            rank,size;
1592 
1593   PetscFunctionBegin;
1594   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)da),&rank);CHKERRQ(ierr);
1595   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)da),&size);CHKERRQ(ierr);
1596 
1597   /*
1598          nc - number of components per grid point
1599 
1600   */
1601   ierr = DMDAGetInfo(da,&dim,&m,0,0,0,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr);
1602   if (s > 1) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Matrix creation for 1d not implemented correctly for stencil width larger than 1");
1603   ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
1604   ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
1605 
1606   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1607   ierr = PetscCalloc2(nx*nc,&cols,nx*nc,&ocols);CHKERRQ(ierr);
1608 
1609   /*
1610         note should be smaller for first and last process with no periodic
1611         does not handle dfill
1612   */
1613   cnt = 0;
1614   /* coupling with process to the left */
1615   for (i=0; i<s; i++) {
1616     for (j=0; j<nc; j++) {
1617       ocols[cnt] = ((!rank) ? 0 : (s - i)*(ofill[j+1] - ofill[j]));
1618       cols[cnt]  = dfill[j+1] - dfill[j] + (s + i)*(ofill[j+1] - ofill[j]);
1619       if (!rank && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1620         if (size > 1) ocols[cnt] += (s - i)*(ofill[j+1] - ofill[j]);
1621         else cols[cnt] += (s - i)*(ofill[j+1] - ofill[j]);
1622       }
1623       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1624       cnt++;
1625     }
1626   }
1627   for (i=s; i<nx-s; i++) {
1628     for (j=0; j<nc; j++) {
1629       cols[cnt] = dfill[j+1] - dfill[j] + 2*s*(ofill[j+1] - ofill[j]);
1630       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1631       cnt++;
1632     }
1633   }
1634   /* coupling with process to the right */
1635   for (i=nx-s; i<nx; i++) {
1636     for (j=0; j<nc; j++) {
1637       ocols[cnt] = ((rank == (size-1)) ? 0 : (i - nx + s + 1)*(ofill[j+1] - ofill[j]));
1638       cols[cnt]  = dfill[j+1] - dfill[j] + (s + nx - i - 1)*(ofill[j+1] - ofill[j]);
1639       if ((rank == size-1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1640         if (size > 1) ocols[cnt] += (i - nx + s + 1)*(ofill[j+1] - ofill[j]);
1641         else cols[cnt] += (i - nx + s + 1)*(ofill[j+1] - ofill[j]);
1642       }
1643       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1644       cnt++;
1645     }
1646   }
1647 
1648   ierr = MatSeqAIJSetPreallocation(J,0,cols);CHKERRQ(ierr);
1649   ierr = MatMPIAIJSetPreallocation(J,0,cols,0,ocols);CHKERRQ(ierr);
1650   ierr = PetscFree2(cols,ocols);CHKERRQ(ierr);
1651 
1652   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1653   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1654 
1655   /*
1656     For each node in the grid: we get the neighbors in the local (on processor ordering
1657     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1658     PETSc ordering.
1659   */
1660   if (!da->prealloc_only) {
1661     ierr = PetscCalloc2(maxcnt,&values,maxcnt,&cols);CHKERRQ(ierr);
1662 
1663     row = xs*nc;
1664     /* coupling with process to the left */
1665     for (i=xs; i<xs+s; i++) {
1666       for (j=0; j<nc; j++) {
1667         cnt = 0;
1668         if (rank) {
1669           for (l=0; l<s; l++) {
1670             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1671           }
1672         }
1673         if (!rank && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1674           for (l=0; l<s; l++) {
1675             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (m + i - s - l)*nc + ofill[k];
1676           }
1677         }
1678         if (dfill) {
1679           for (k=dfill[j]; k<dfill[j+1]; k++) {
1680             cols[cnt++] = i*nc + dfill[k];
1681           }
1682         } else {
1683           for (k=0; k<nc; k++) {
1684             cols[cnt++] = i*nc + k;
1685           }
1686         }
1687         for (l=0; l<s; l++) {
1688           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1689         }
1690         ierr = MatSetValues(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1691         row++;
1692       }
1693     }
1694     for (i=xs+s; i<xs+nx-s; i++) {
1695       for (j=0; j<nc; j++) {
1696         cnt = 0;
1697         for (l=0; l<s; l++) {
1698           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1699         }
1700         if (dfill) {
1701           for (k=dfill[j]; k<dfill[j+1]; k++) {
1702             cols[cnt++] = i*nc + dfill[k];
1703           }
1704         } else {
1705           for (k=0; k<nc; k++) {
1706             cols[cnt++] = i*nc + k;
1707           }
1708         }
1709         for (l=0; l<s; l++) {
1710           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1711         }
1712         ierr = MatSetValues(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1713         row++;
1714       }
1715     }
1716     /* coupling with process to the right */
1717     for (i=xs+nx-s; i<xs+nx; i++) {
1718       for (j=0; j<nc; j++) {
1719         cnt = 0;
1720         for (l=0; l<s; l++) {
1721           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1722         }
1723         if (dfill) {
1724           for (k=dfill[j]; k<dfill[j+1]; k++) {
1725             cols[cnt++] = i*nc + dfill[k];
1726           }
1727         } else {
1728           for (k=0; k<nc; k++) {
1729             cols[cnt++] = i*nc + k;
1730           }
1731         }
1732         if (rank < size-1) {
1733           for (l=0; l<s; l++) {
1734             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1735           }
1736         }
1737         if ((rank == size-1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1738           for (l=0; l<s; l++) {
1739             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s - l - m + 2)*nc + ofill[k];
1740           }
1741         }
1742         ierr = MatSetValues(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1743         row++;
1744       }
1745     }
1746     ierr = PetscFree2(values,cols);CHKERRQ(ierr);
1747     /* do not copy values to GPU since they are all zero and not yet needed there */
1748     ierr = MatPinToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1749     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1750     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1751     ierr = MatPinToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1752     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1753   }
1754   PetscFunctionReturn(0);
1755 }
1756 
1757 /* ---------------------------------------------------------------------------------*/
1758 
1759 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da,Mat J)
1760 {
1761   PetscErrorCode         ierr;
1762   PetscInt               xs,nx,i,i1,slot,gxs,gnx;
1763   PetscInt               m,dim,s,*cols = NULL,nc,*rows = NULL,col,cnt,l;
1764   PetscInt               istart,iend;
1765   PetscScalar            *values;
1766   DMBoundaryType         bx;
1767   ISLocalToGlobalMapping ltog,mltog;
1768 
1769   PetscFunctionBegin;
1770   /*
1771          nc - number of components per grid point
1772          col - number of colors needed in one direction for single component problem
1773 
1774   */
1775   ierr = DMDAGetInfo(da,&dim,&m,0,0,0,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr);
1776   col  = 2*s + 1;
1777 
1778   ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
1779   ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
1780 
1781   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1782   ierr = MatSeqAIJSetPreallocation(J,col*nc,0);CHKERRQ(ierr);
1783   ierr = MatMPIAIJSetPreallocation(J,col*nc,0,col*nc,0);CHKERRQ(ierr);
1784 
1785   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1786   ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr);
1787   if (!mltog) {
1788     ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1789   }
1790 
1791   /*
1792     For each node in the grid: we get the neighbors in the local (on processor ordering
1793     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1794     PETSc ordering.
1795   */
1796   if (!da->prealloc_only) {
1797     ierr = PetscMalloc2(nc,&rows,col*nc*nc,&cols);CHKERRQ(ierr);
1798     ierr = PetscCalloc1(col*nc*nc,&values);CHKERRQ(ierr);
1799     for (i=xs; i<xs+nx; i++) {
1800       istart = PetscMax(-s,gxs - i);
1801       iend   = PetscMin(s,gxs + gnx - i - 1);
1802       slot   = i - gxs;
1803 
1804       cnt = 0;
1805       for (l=0; l<nc; l++) {
1806         for (i1=istart; i1<iend+1; i1++) {
1807           cols[cnt++] = l + nc*(slot + i1);
1808         }
1809         rows[l] = l + nc*(slot);
1810       }
1811       ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1812     }
1813     ierr = PetscFree(values);CHKERRQ(ierr);
1814     /* do not copy values to GPU since they are all zero and not yet needed there */
1815     ierr = MatPinToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1816     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1817     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1818     ierr = MatPinToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1819     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1820     ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1821   }
1822   PetscFunctionReturn(0);
1823 }
1824 
1825 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da,Mat J)
1826 {
1827   PetscErrorCode         ierr;
1828   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1829   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
1830   PetscInt               istart,iend,jstart,jend,ii,jj;
1831   MPI_Comm               comm;
1832   PetscScalar            *values;
1833   DMBoundaryType         bx,by;
1834   DMDAStencilType        st;
1835   ISLocalToGlobalMapping ltog;
1836 
1837   PetscFunctionBegin;
1838   /*
1839      nc - number of components per grid point
1840      col - number of colors needed in one direction for single component problem
1841   */
1842   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
1843   col  = 2*s + 1;
1844 
1845   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
1846   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
1847   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1848 
1849   ierr = PetscMalloc1(col*col*nc*nc,&cols);CHKERRQ(ierr);
1850 
1851   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1852 
1853   /* determine the matrix preallocation information */
1854   ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
1855   for (i=xs; i<xs+nx; i++) {
1856     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1857     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1858     for (j=ys; j<ys+ny; j++) {
1859       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1860       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1861       slot   = i - gxs + gnx*(j - gys);
1862 
1863       /* Find block columns in block row */
1864       cnt = 0;
1865       for (ii=istart; ii<iend+1; ii++) {
1866         for (jj=jstart; jj<jend+1; jj++) {
1867           if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1868             cols[cnt++] = slot + ii + gnx*jj;
1869           }
1870         }
1871       }
1872       ierr = MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1873     }
1874   }
1875   ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1876   ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1877   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1878 
1879   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1880 
1881   /*
1882     For each node in the grid: we get the neighbors in the local (on processor ordering
1883     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1884     PETSc ordering.
1885   */
1886   if (!da->prealloc_only) {
1887     ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr);
1888     for (i=xs; i<xs+nx; i++) {
1889       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1890       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1891       for (j=ys; j<ys+ny; j++) {
1892         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1893         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1894         slot = i - gxs + gnx*(j - gys);
1895         cnt  = 0;
1896         for (ii=istart; ii<iend+1; ii++) {
1897           for (jj=jstart; jj<jend+1; jj++) {
1898             if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1899               cols[cnt++] = slot + ii + gnx*jj;
1900             }
1901           }
1902         }
1903         ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1904       }
1905     }
1906     ierr = PetscFree(values);CHKERRQ(ierr);
1907     /* do not copy values to GPU since they are all zero and not yet needed there */
1908     ierr = MatPinToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1909     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1910     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1911     ierr = MatPinToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1912     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1913   }
1914   ierr = PetscFree(cols);CHKERRQ(ierr);
1915   PetscFunctionReturn(0);
1916 }
1917 
1918 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da,Mat J)
1919 {
1920   PetscErrorCode         ierr;
1921   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1922   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
1923   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1924   MPI_Comm               comm;
1925   PetscScalar            *values;
1926   DMBoundaryType         bx,by,bz;
1927   DMDAStencilType        st;
1928   ISLocalToGlobalMapping ltog;
1929 
1930   PetscFunctionBegin;
1931   /*
1932          nc - number of components per grid point
1933          col - number of colors needed in one direction for single component problem
1934 
1935   */
1936   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1937   col  = 2*s + 1;
1938 
1939   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1940   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1941   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1942 
1943   ierr = PetscMalloc1(col*col*col,&cols);CHKERRQ(ierr);
1944 
1945   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1946 
1947   /* determine the matrix preallocation information */
1948   ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1949   for (i=xs; i<xs+nx; i++) {
1950     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1951     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1952     for (j=ys; j<ys+ny; j++) {
1953       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1954       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1955       for (k=zs; k<zs+nz; k++) {
1956         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1957         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1958 
1959         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1960 
1961         /* Find block columns in block row */
1962         cnt = 0;
1963         for (ii=istart; ii<iend+1; ii++) {
1964           for (jj=jstart; jj<jend+1; jj++) {
1965             for (kk=kstart; kk<kend+1; kk++) {
1966               if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1967                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1968               }
1969             }
1970           }
1971         }
1972         ierr = MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1973       }
1974     }
1975   }
1976   ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1977   ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1978   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1979 
1980   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1981 
1982   /*
1983     For each node in the grid: we get the neighbors in the local (on processor ordering
1984     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1985     PETSc ordering.
1986   */
1987   if (!da->prealloc_only) {
1988     ierr = PetscCalloc1(col*col*col*nc*nc,&values);CHKERRQ(ierr);
1989     for (i=xs; i<xs+nx; i++) {
1990       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1991       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1992       for (j=ys; j<ys+ny; j++) {
1993         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1994         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1995         for (k=zs; k<zs+nz; k++) {
1996           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1997           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1998 
1999           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2000 
2001           cnt = 0;
2002           for (ii=istart; ii<iend+1; ii++) {
2003             for (jj=jstart; jj<jend+1; jj++) {
2004               for (kk=kstart; kk<kend+1; kk++) {
2005                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
2006                   cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
2007                 }
2008               }
2009             }
2010           }
2011           ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
2012         }
2013       }
2014     }
2015     ierr = PetscFree(values);CHKERRQ(ierr);
2016     /* do not copy values to GPU since they are all zero and not yet needed there */
2017     ierr = MatPinToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
2018     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2019     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2020     ierr = MatPinToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
2021     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2022   }
2023   ierr = PetscFree(cols);CHKERRQ(ierr);
2024   PetscFunctionReturn(0);
2025 }
2026 
2027 /*
2028   This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to
2029   identify in the local ordering with periodic domain.
2030 */
2031 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog,PetscInt *row,PetscInt *cnt,PetscInt col[])
2032 {
2033   PetscErrorCode ierr;
2034   PetscInt       i,n;
2035 
2036   PetscFunctionBegin;
2037   ierr = ISLocalToGlobalMappingApplyBlock(ltog,1,row,row);CHKERRQ(ierr);
2038   ierr = ISLocalToGlobalMappingApplyBlock(ltog,*cnt,col,col);CHKERRQ(ierr);
2039   for (i=0,n=0; i<*cnt; i++) {
2040     if (col[i] >= *row) col[n++] = col[i];
2041   }
2042   *cnt = n;
2043   PetscFunctionReturn(0);
2044 }
2045 
2046 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da,Mat J)
2047 {
2048   PetscErrorCode         ierr;
2049   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
2050   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
2051   PetscInt               istart,iend,jstart,jend,ii,jj;
2052   MPI_Comm               comm;
2053   PetscScalar            *values;
2054   DMBoundaryType         bx,by;
2055   DMDAStencilType        st;
2056   ISLocalToGlobalMapping ltog;
2057 
2058   PetscFunctionBegin;
2059   /*
2060      nc - number of components per grid point
2061      col - number of colors needed in one direction for single component problem
2062   */
2063   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
2064   col  = 2*s + 1;
2065 
2066   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
2067   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
2068   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
2069 
2070   ierr = PetscMalloc1(col*col*nc*nc,&cols);CHKERRQ(ierr);
2071 
2072   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
2073 
2074   /* determine the matrix preallocation information */
2075   ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
2076   for (i=xs; i<xs+nx; i++) {
2077     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2078     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2079     for (j=ys; j<ys+ny; j++) {
2080       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2081       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2082       slot   = i - gxs + gnx*(j - gys);
2083 
2084       /* Find block columns in block row */
2085       cnt = 0;
2086       for (ii=istart; ii<iend+1; ii++) {
2087         for (jj=jstart; jj<jend+1; jj++) {
2088           if (st == DMDA_STENCIL_BOX || !ii || !jj) {
2089             cols[cnt++] = slot + ii + gnx*jj;
2090           }
2091         }
2092       }
2093       ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
2094       ierr = MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
2095     }
2096   }
2097   ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
2098   ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
2099   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2100 
2101   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
2102 
2103   /*
2104     For each node in the grid: we get the neighbors in the local (on processor ordering
2105     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2106     PETSc ordering.
2107   */
2108   if (!da->prealloc_only) {
2109     ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr);
2110     for (i=xs; i<xs+nx; i++) {
2111       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2112       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2113       for (j=ys; j<ys+ny; j++) {
2114         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2115         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2116         slot   = i - gxs + gnx*(j - gys);
2117 
2118         /* Find block columns in block row */
2119         cnt = 0;
2120         for (ii=istart; ii<iend+1; ii++) {
2121           for (jj=jstart; jj<jend+1; jj++) {
2122             if (st == DMDA_STENCIL_BOX || !ii || !jj) {
2123               cols[cnt++] = slot + ii + gnx*jj;
2124             }
2125           }
2126         }
2127         ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
2128         ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
2129       }
2130     }
2131     ierr = PetscFree(values);CHKERRQ(ierr);
2132     /* do not copy values to GPU since they are all zero and not yet needed there */
2133     ierr = MatPinToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
2134     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2135     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2136     ierr = MatPinToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
2137     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2138   }
2139   ierr = PetscFree(cols);CHKERRQ(ierr);
2140   PetscFunctionReturn(0);
2141 }
2142 
2143 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da,Mat J)
2144 {
2145   PetscErrorCode         ierr;
2146   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
2147   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
2148   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
2149   MPI_Comm               comm;
2150   PetscScalar            *values;
2151   DMBoundaryType         bx,by,bz;
2152   DMDAStencilType        st;
2153   ISLocalToGlobalMapping ltog;
2154 
2155   PetscFunctionBegin;
2156   /*
2157      nc - number of components per grid point
2158      col - number of colors needed in one direction for single component problem
2159   */
2160   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
2161   col  = 2*s + 1;
2162 
2163   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
2164   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
2165   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
2166 
2167   /* create the matrix */
2168   ierr = PetscMalloc1(col*col*col,&cols);CHKERRQ(ierr);
2169 
2170   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
2171 
2172   /* determine the matrix preallocation information */
2173   ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
2174   for (i=xs; i<xs+nx; i++) {
2175     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2176     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2177     for (j=ys; j<ys+ny; j++) {
2178       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2179       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2180       for (k=zs; k<zs+nz; k++) {
2181         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2182         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2183 
2184         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2185 
2186         /* Find block columns in block row */
2187         cnt = 0;
2188         for (ii=istart; ii<iend+1; ii++) {
2189           for (jj=jstart; jj<jend+1; jj++) {
2190             for (kk=kstart; kk<kend+1; kk++) {
2191               if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
2192                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
2193               }
2194             }
2195           }
2196         }
2197         ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
2198         ierr = MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
2199       }
2200     }
2201   }
2202   ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
2203   ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
2204   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2205 
2206   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
2207 
2208   /*
2209     For each node in the grid: we get the neighbors in the local (on processor ordering
2210     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2211     PETSc ordering.
2212   */
2213   if (!da->prealloc_only) {
2214     ierr = PetscCalloc1(col*col*col*nc*nc,&values);CHKERRQ(ierr);
2215     for (i=xs; i<xs+nx; i++) {
2216       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2217       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2218       for (j=ys; j<ys+ny; j++) {
2219         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2220         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2221         for (k=zs; k<zs+nz; k++) {
2222           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2223           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2224 
2225           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2226 
2227           cnt = 0;
2228           for (ii=istart; ii<iend+1; ii++) {
2229             for (jj=jstart; jj<jend+1; jj++) {
2230               for (kk=kstart; kk<kend+1; kk++) {
2231                 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
2232                   cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
2233                 }
2234               }
2235             }
2236           }
2237           ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
2238           ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
2239         }
2240       }
2241     }
2242     ierr = PetscFree(values);CHKERRQ(ierr);
2243     /* do not copy values to GPU since they are all zero and not yet needed there */
2244     ierr = MatPinToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
2245     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2246     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2247     ierr = MatPinToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
2248     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2249   }
2250   ierr = PetscFree(cols);CHKERRQ(ierr);
2251   PetscFunctionReturn(0);
2252 }
2253 
2254 /* ---------------------------------------------------------------------------------*/
2255 
2256 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da,Mat J)
2257 {
2258   PetscErrorCode         ierr;
2259   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
2260   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt, maxcnt = 0,l,p,*dnz,*onz;
2261   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
2262   DM_DA                  *dd = (DM_DA*)da->data;
2263   PetscInt               ifill_col,*dfill = dd->dfill,*ofill = dd->ofill;
2264   MPI_Comm               comm;
2265   PetscScalar            *values;
2266   DMBoundaryType         bx,by,bz;
2267   ISLocalToGlobalMapping ltog;
2268   DMDAStencilType        st;
2269   PetscBool              removedups = PETSC_FALSE;
2270 
2271   PetscFunctionBegin;
2272   /*
2273          nc - number of components per grid point
2274          col - number of colors needed in one direction for single component problem
2275 
2276   */
2277   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
2278   col  = 2*s + 1;
2279   if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
2280                  by 2*stencil_width + 1\n");
2281   if (by == DM_BOUNDARY_PERIODIC && (n % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
2282                  by 2*stencil_width + 1\n");
2283   if (bz == DM_BOUNDARY_PERIODIC && (p % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
2284                  by 2*stencil_width + 1\n");
2285 
2286   /*
2287        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
2288        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
2289   */
2290   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
2291   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
2292   if (P == 1 && 2*s >= p) removedups = PETSC_TRUE;
2293 
2294   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
2295   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
2296   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
2297 
2298   ierr = PetscMalloc1(col*col*col*nc,&cols);CHKERRQ(ierr);
2299   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
2300 
2301   /* determine the matrix preallocation information */
2302   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
2303 
2304   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
2305   for (i=xs; i<xs+nx; i++) {
2306     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2307     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2308     for (j=ys; j<ys+ny; j++) {
2309       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2310       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2311       for (k=zs; k<zs+nz; k++) {
2312         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2313         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2314 
2315         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2316 
2317         for (l=0; l<nc; l++) {
2318           cnt = 0;
2319           for (ii=istart; ii<iend+1; ii++) {
2320             for (jj=jstart; jj<jend+1; jj++) {
2321               for (kk=kstart; kk<kend+1; kk++) {
2322                 if (ii || jj || kk) {
2323                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
2324                     for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2325                   }
2326                 } else {
2327                   if (dfill) {
2328                     for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2329                   } else {
2330                     for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2331                   }
2332                 }
2333               }
2334             }
2335           }
2336           row  = l + nc*(slot);
2337           maxcnt = PetscMax(maxcnt,cnt);
2338           if (removedups) {
2339             ierr = MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
2340           } else {
2341             ierr = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
2342           }
2343         }
2344       }
2345     }
2346   }
2347   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
2348   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
2349   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2350   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
2351 
2352   /*
2353     For each node in the grid: we get the neighbors in the local (on processor ordering
2354     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2355     PETSc ordering.
2356   */
2357   if (!da->prealloc_only) {
2358     ierr = PetscCalloc1(maxcnt,&values);CHKERRQ(ierr);
2359     for (i=xs; i<xs+nx; i++) {
2360       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2361       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2362       for (j=ys; j<ys+ny; j++) {
2363         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2364         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2365         for (k=zs; k<zs+nz; k++) {
2366           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2367           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2368 
2369           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2370 
2371           for (l=0; l<nc; l++) {
2372             cnt = 0;
2373             for (ii=istart; ii<iend+1; ii++) {
2374               for (jj=jstart; jj<jend+1; jj++) {
2375                 for (kk=kstart; kk<kend+1; kk++) {
2376                   if (ii || jj || kk) {
2377                     if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
2378                       for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2379                     }
2380                   } else {
2381                     if (dfill) {
2382                       for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2383                     } else {
2384                       for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2385                     }
2386                   }
2387                 }
2388               }
2389             }
2390             row  = l + nc*(slot);
2391             ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
2392           }
2393         }
2394       }
2395     }
2396     ierr = PetscFree(values);CHKERRQ(ierr);
2397     /* do not copy values to GPU since they are all zero and not yet needed there */
2398     ierr = MatPinToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
2399     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2400     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2401     ierr = MatPinToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
2402     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2403   }
2404   ierr = PetscFree(cols);CHKERRQ(ierr);
2405   PetscFunctionReturn(0);
2406 }
2407