xref: /petsc/src/dm/impls/da/fdda.c (revision 071fcb05f2a6aea8aef2c2090580530b9e9df77e)
1 
2 #include <petsc/private/dmdaimpl.h> /*I      "petscdmda.h"     I*/
3 #include <petscmat.h>
4 
5 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM,ISColoringType,ISColoring*);
6 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM,ISColoringType,ISColoring*);
7 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM,ISColoringType,ISColoring*);
8 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM,ISColoringType,ISColoring*);
9 
10 /*
11    For ghost i that may be negative or greater than the upper bound this
12   maps it into the 0:m-1 range using periodicity
13 */
14 #define SetInRange(i,m) ((i < 0) ? m+i : ((i >= m) ? i-m : i))
15 
16 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill,PetscInt w,PetscInt **rfill)
17 {
18   PetscErrorCode ierr;
19   PetscInt       i,j,nz,*fill;
20 
21   PetscFunctionBegin;
22   if (!dfill) PetscFunctionReturn(0);
23 
24   /* count number nonzeros */
25   nz = 0;
26   for (i=0; i<w; i++) {
27     for (j=0; j<w; j++) {
28       if (dfill[w*i+j]) nz++;
29     }
30   }
31   ierr = PetscMalloc1(nz + w + 1,&fill);CHKERRQ(ierr);
32   /* construct modified CSR storage of nonzero structure */
33   /*  fill[0 -- w] marks starts of each row of column indices (and end of last row)
34    so fill[1] - fill[0] gives number of nonzeros in first row etc */
35   nz = w + 1;
36   for (i=0; i<w; i++) {
37     fill[i] = nz;
38     for (j=0; j<w; j++) {
39       if (dfill[w*i+j]) {
40         fill[nz] = j;
41         nz++;
42       }
43     }
44   }
45   fill[w] = nz;
46 
47   *rfill = fill;
48   PetscFunctionReturn(0);
49 }
50 
51 
52 static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse,PetscInt w,PetscInt **rfill)
53 {
54   PetscErrorCode ierr;
55   PetscInt       nz;
56 
57   PetscFunctionBegin;
58   if (!dfillsparse) PetscFunctionReturn(0);
59 
60   /* Determine number of non-zeros */
61   nz = (dfillsparse[w] - w - 1);
62 
63   /* Allocate space for our copy of the given sparse matrix representation. */
64   ierr = PetscMalloc1(nz + w + 1,rfill);CHKERRQ(ierr);
65   ierr = PetscArraycpy(*rfill,dfillsparse,nz+w+1);CHKERRQ(ierr);
66   PetscFunctionReturn(0);
67 }
68 
69 
70 static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd)
71 {
72   PetscErrorCode ierr;
73   PetscInt       i,k,cnt = 1;
74 
75   PetscFunctionBegin;
76 
77   /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of
78    columns to the left with any nonzeros in them plus 1 */
79   ierr = PetscCalloc1(dd->w,&dd->ofillcols);CHKERRQ(ierr);
80   for (i=0; i<dd->w; i++) {
81     for (k=dd->ofill[i]; k<dd->ofill[i+1]; k++) dd->ofillcols[dd->ofill[k]] = 1;
82   }
83   for (i=0; i<dd->w; i++) {
84     if (dd->ofillcols[i]) {
85       dd->ofillcols[i] = cnt++;
86     }
87   }
88   PetscFunctionReturn(0);
89 }
90 
91 
92 
93 /*@
94     DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem
95     of the matrix returned by DMCreateMatrix().
96 
97     Logically Collective on da
98 
99     Input Parameter:
100 +   da - the distributed array
101 .   dfill - the fill pattern in the diagonal block (may be NULL, means use dense block)
102 -   ofill - the fill pattern in the off-diagonal blocks
103 
104 
105     Level: developer
106 
107     Notes:
108     This only makes sense when you are doing multicomponent problems but using the
109        MPIAIJ matrix format
110 
111            The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries
112        representing coupling and 0 entries for missing coupling. For example
113 $             dfill[9] = {1, 0, 0,
114 $                         1, 1, 0,
115 $                         0, 1, 1}
116        means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with
117        itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the
118        diagonal block).
119 
120      DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
121      can be represented in the dfill, ofill format
122 
123    Contributed by Glenn Hammond
124 
125 .seealso DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly()
126 
127 @*/
128 PetscErrorCode  DMDASetBlockFills(DM da,const PetscInt *dfill,const PetscInt *ofill)
129 {
130   DM_DA          *dd = (DM_DA*)da->data;
131   PetscErrorCode ierr;
132 
133   PetscFunctionBegin;
134   /* save the given dfill and ofill information */
135   ierr = DMDASetBlockFills_Private(dfill,dd->w,&dd->dfill);CHKERRQ(ierr);
136   ierr = DMDASetBlockFills_Private(ofill,dd->w,&dd->ofill);CHKERRQ(ierr);
137 
138   /* count nonzeros in ofill columns */
139   ierr = DMDASetBlockFills_Private2(dd);CHKERRQ(ierr);
140 
141   PetscFunctionReturn(0);
142 }
143 
144 
145 /*@
146     DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem
147     of the matrix returned by DMCreateMatrix(), using sparse representations
148     of fill patterns.
149 
150     Logically Collective on da
151 
152     Input Parameter:
153 +   da - the distributed array
154 .   dfill - the sparse fill pattern in the diagonal block (may be NULL, means use dense block)
155 -   ofill - the sparse fill pattern in the off-diagonal blocks
156 
157 
158     Level: developer
159 
160     Notes: This only makes sense when you are doing multicomponent problems but using the
161        MPIAIJ matrix format
162 
163            The format for dfill and ofill is a sparse representation of a
164            dof-by-dof matrix with 1 entries representing coupling and 0 entries
165            for missing coupling.  The sparse representation is a 1 dimensional
166            array of length nz + dof + 1, where nz is the number of non-zeros in
167            the matrix.  The first dof entries in the array give the
168            starting array indices of each row's items in the rest of the array,
169            the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array)
170            and the remaining nz items give the column indices of each of
171            the 1s within the logical 2D matrix.  Each row's items within
172            the array are the column indices of the 1s within that row
173            of the 2D matrix.  PETSc developers may recognize that this is the
174            same format as that computed by the DMDASetBlockFills_Private()
175            function from a dense 2D matrix representation.
176 
177      DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
178      can be represented in the dfill, ofill format
179 
180    Contributed by Philip C. Roth
181 
182 .seealso DMDASetBlockFills(), DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly()
183 
184 @*/
185 PetscErrorCode  DMDASetBlockFillsSparse(DM da,const PetscInt *dfillsparse,const PetscInt *ofillsparse)
186 {
187   DM_DA          *dd = (DM_DA*)da->data;
188   PetscErrorCode ierr;
189 
190   PetscFunctionBegin;
191   /* save the given dfill and ofill information */
192   ierr = DMDASetBlockFillsSparse_Private(dfillsparse,dd->w,&dd->dfill);CHKERRQ(ierr);
193   ierr = DMDASetBlockFillsSparse_Private(ofillsparse,dd->w,&dd->ofill);CHKERRQ(ierr);
194 
195   /* count nonzeros in ofill columns */
196   ierr = DMDASetBlockFills_Private2(dd);CHKERRQ(ierr);
197 
198   PetscFunctionReturn(0);
199 }
200 
201 
202 PetscErrorCode  DMCreateColoring_DA(DM da,ISColoringType ctype,ISColoring *coloring)
203 {
204   PetscErrorCode   ierr;
205   PetscInt         dim,m,n,p,nc;
206   DMBoundaryType bx,by,bz;
207   MPI_Comm         comm;
208   PetscMPIInt      size;
209   PetscBool        isBAIJ;
210   DM_DA            *dd = (DM_DA*)da->data;
211 
212   PetscFunctionBegin;
213   /*
214                                   m
215           ------------------------------------------------------
216          |                                                     |
217          |                                                     |
218          |               ----------------------                |
219          |               |                    |                |
220       n  |           yn  |                    |                |
221          |               |                    |                |
222          |               .---------------------                |
223          |             (xs,ys)     xn                          |
224          |            .                                        |
225          |         (gxs,gys)                                   |
226          |                                                     |
227           -----------------------------------------------------
228   */
229 
230   /*
231          nc - number of components per grid point
232          col - number of colors needed in one direction for single component problem
233 
234   */
235   ierr = DMDAGetInfo(da,&dim,0,0,0,&m,&n,&p,&nc,0,&bx,&by,&bz,0);CHKERRQ(ierr);
236 
237   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
238   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
239   if (ctype == IS_COLORING_LOCAL) {
240     if (size == 1) {
241       ctype = IS_COLORING_GLOBAL;
242     } else if (dim > 1) {
243       if ((m==1 && bx == DM_BOUNDARY_PERIODIC) || (n==1 && by == DM_BOUNDARY_PERIODIC) || (p==1 && bz == DM_BOUNDARY_PERIODIC)) {
244         SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain  on the same process");
245       }
246     }
247   }
248 
249   /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ
250      matrices is for the blocks, not the individual matrix elements  */
251   ierr = PetscStrcmp(da->mattype,MATBAIJ,&isBAIJ);CHKERRQ(ierr);
252   if (!isBAIJ) {ierr = PetscStrcmp(da->mattype,MATMPIBAIJ,&isBAIJ);CHKERRQ(ierr);}
253   if (!isBAIJ) {ierr = PetscStrcmp(da->mattype,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);}
254   if (isBAIJ) {
255     dd->w  = 1;
256     dd->xs = dd->xs/nc;
257     dd->xe = dd->xe/nc;
258     dd->Xs = dd->Xs/nc;
259     dd->Xe = dd->Xe/nc;
260   }
261 
262   /*
263      We do not provide a getcoloring function in the DMDA operations because
264    the basic DMDA does not know about matrices. We think of DMDA as being more
265    more low-level then matrices.
266   */
267   if (dim == 1) {
268     ierr = DMCreateColoring_DA_1d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
269   } else if (dim == 2) {
270     ierr =  DMCreateColoring_DA_2d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
271   } else if (dim == 3) {
272     ierr =  DMCreateColoring_DA_3d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
273   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not done for %D dimension, send us mail petsc-maint@mcs.anl.gov for code",dim);
274   if (isBAIJ) {
275     dd->w  = nc;
276     dd->xs = dd->xs*nc;
277     dd->xe = dd->xe*nc;
278     dd->Xs = dd->Xs*nc;
279     dd->Xe = dd->Xe*nc;
280   }
281   PetscFunctionReturn(0);
282 }
283 
284 /* ---------------------------------------------------------------------------------*/
285 
286 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
287 {
288   PetscErrorCode   ierr;
289   PetscInt         xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,M,N,dim,s,k,nc,col;
290   PetscInt         ncolors;
291   MPI_Comm         comm;
292   DMBoundaryType bx,by;
293   DMDAStencilType  st;
294   ISColoringValue  *colors;
295   DM_DA            *dd = (DM_DA*)da->data;
296 
297   PetscFunctionBegin;
298   /*
299          nc - number of components per grid point
300          col - number of colors needed in one direction for single component problem
301 
302   */
303   ierr = DMDAGetInfo(da,&dim,&m,&n,0,&M,&N,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
304   col  = 2*s + 1;
305   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
306   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
307   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
308 
309   /* special case as taught to us by Paul Hovland */
310   if (st == DMDA_STENCIL_STAR && s == 1) {
311     ierr = DMCreateColoring_DA_2d_5pt_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
312   } else {
313 
314     if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ2(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X (%d) is divisible\n\
315                                                             by 2*stencil_width + 1 (%d)\n", m, col);
316     if (by == DM_BOUNDARY_PERIODIC && (n % col)) SETERRQ2(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y (%d) is divisible\n\
317                                                             by 2*stencil_width + 1 (%d)\n", n, col);
318     if (ctype == IS_COLORING_GLOBAL) {
319       if (!dd->localcoloring) {
320         ierr = PetscMalloc1(nc*nx*ny,&colors);CHKERRQ(ierr);
321         ii   = 0;
322         for (j=ys; j<ys+ny; j++) {
323           for (i=xs; i<xs+nx; i++) {
324             for (k=0; k<nc; k++) {
325               colors[ii++] = k + nc*((i % col) + col*(j % col));
326             }
327           }
328         }
329         ncolors = nc + nc*(col-1 + col*(col-1));
330         ierr    = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
331       }
332       *coloring = dd->localcoloring;
333     } else if (ctype == IS_COLORING_LOCAL) {
334       if (!dd->ghostedcoloring) {
335         ierr = PetscMalloc1(nc*gnx*gny,&colors);CHKERRQ(ierr);
336         ii   = 0;
337         for (j=gys; j<gys+gny; j++) {
338           for (i=gxs; i<gxs+gnx; i++) {
339             for (k=0; k<nc; k++) {
340               /* the complicated stuff is to handle periodic boundaries */
341               colors[ii++] = k + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col));
342             }
343           }
344         }
345         ncolors = nc + nc*(col - 1 + col*(col-1));
346         ierr    = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
347         /* PetscIntView(ncolors,(PetscInt*)colors,0); */
348 
349         ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
350       }
351       *coloring = dd->ghostedcoloring;
352     } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
353   }
354   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
355   PetscFunctionReturn(0);
356 }
357 
358 /* ---------------------------------------------------------------------------------*/
359 
360 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
361 {
362   PetscErrorCode   ierr;
363   PetscInt         xs,ys,nx,ny,i,j,gxs,gys,gnx,gny,m,n,p,dim,s,k,nc,col,zs,gzs,ii,l,nz,gnz,M,N,P;
364   PetscInt         ncolors;
365   MPI_Comm         comm;
366   DMBoundaryType bx,by,bz;
367   DMDAStencilType  st;
368   ISColoringValue  *colors;
369   DM_DA            *dd = (DM_DA*)da->data;
370 
371   PetscFunctionBegin;
372   /*
373          nc - number of components per grid point
374          col - number of colors needed in one direction for single component problem
375 
376   */
377   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
378   col  = 2*s + 1;
379   if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
380                                                          by 2*stencil_width + 1\n");
381   if (by == DM_BOUNDARY_PERIODIC && (n % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
382                                                          by 2*stencil_width + 1\n");
383   if (bz == DM_BOUNDARY_PERIODIC && (p % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
384                                                          by 2*stencil_width + 1\n");
385 
386   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
387   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
388   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
389 
390   /* create the coloring */
391   if (ctype == IS_COLORING_GLOBAL) {
392     if (!dd->localcoloring) {
393       ierr = PetscMalloc1(nc*nx*ny*nz,&colors);CHKERRQ(ierr);
394       ii   = 0;
395       for (k=zs; k<zs+nz; k++) {
396         for (j=ys; j<ys+ny; j++) {
397           for (i=xs; i<xs+nx; i++) {
398             for (l=0; l<nc; l++) {
399               colors[ii++] = l + nc*((i % col) + col*(j % col) + col*col*(k % col));
400             }
401           }
402         }
403       }
404       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
405       ierr    = ISColoringCreate(comm,ncolors,nc*nx*ny*nz,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
406     }
407     *coloring = dd->localcoloring;
408   } else if (ctype == IS_COLORING_LOCAL) {
409     if (!dd->ghostedcoloring) {
410       ierr = PetscMalloc1(nc*gnx*gny*gnz,&colors);CHKERRQ(ierr);
411       ii   = 0;
412       for (k=gzs; k<gzs+gnz; k++) {
413         for (j=gys; j<gys+gny; j++) {
414           for (i=gxs; i<gxs+gnx; i++) {
415             for (l=0; l<nc; l++) {
416               /* the complicated stuff is to handle periodic boundaries */
417               colors[ii++] = l + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col) + col*col*(SetInRange(k,p) % col));
418             }
419           }
420         }
421       }
422       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
423       ierr    = ISColoringCreate(comm,ncolors,nc*gnx*gny*gnz,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
424       ierr    = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
425     }
426     *coloring = dd->ghostedcoloring;
427   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
428   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
429   PetscFunctionReturn(0);
430 }
431 
432 /* ---------------------------------------------------------------------------------*/
433 
434 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
435 {
436   PetscErrorCode   ierr;
437   PetscInt         xs,nx,i,i1,gxs,gnx,l,m,M,dim,s,nc,col;
438   PetscInt         ncolors;
439   MPI_Comm         comm;
440   DMBoundaryType bx;
441   ISColoringValue  *colors;
442   DM_DA            *dd = (DM_DA*)da->data;
443 
444   PetscFunctionBegin;
445   /*
446          nc - number of components per grid point
447          col - number of colors needed in one direction for single component problem
448 
449   */
450   ierr = DMDAGetInfo(da,&dim,&m,0,0,&M,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr);
451   col  = 2*s + 1;
452 
453   if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ2(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points %d is divisible\n\
454                                                           by 2*stencil_width + 1 %d\n",(int)m,(int)col);
455 
456   ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
457   ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
458   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
459 
460   /* create the coloring */
461   if (ctype == IS_COLORING_GLOBAL) {
462     if (!dd->localcoloring) {
463       ierr = PetscMalloc1(nc*nx,&colors);CHKERRQ(ierr);
464       if (dd->ofillcols) {
465         PetscInt tc = 0;
466         for (i=0; i<nc; i++) tc += (PetscInt) (dd->ofillcols[i] > 0);
467         i1 = 0;
468         for (i=xs; i<xs+nx; i++) {
469           for (l=0; l<nc; l++) {
470             if (dd->ofillcols[l] && (i % col)) {
471               colors[i1++] =  nc - 1 + tc*((i % col) - 1) + dd->ofillcols[l];
472             } else {
473               colors[i1++] = l;
474             }
475           }
476         }
477         ncolors = nc + 2*s*tc;
478       } else {
479         i1 = 0;
480         for (i=xs; i<xs+nx; i++) {
481           for (l=0; l<nc; l++) {
482             colors[i1++] = l + nc*(i % col);
483           }
484         }
485         ncolors = nc + nc*(col-1);
486       }
487       ierr = ISColoringCreate(comm,ncolors,nc*nx,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
488     }
489     *coloring = dd->localcoloring;
490   } else if (ctype == IS_COLORING_LOCAL) {
491     if (!dd->ghostedcoloring) {
492       ierr = PetscMalloc1(nc*gnx,&colors);CHKERRQ(ierr);
493       i1   = 0;
494       for (i=gxs; i<gxs+gnx; i++) {
495         for (l=0; l<nc; l++) {
496           /* the complicated stuff is to handle periodic boundaries */
497           colors[i1++] = l + nc*(SetInRange(i,m) % col);
498         }
499       }
500       ncolors = nc + nc*(col-1);
501       ierr    = ISColoringCreate(comm,ncolors,nc*gnx,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
502       ierr    = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
503     }
504     *coloring = dd->ghostedcoloring;
505   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
506   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
507   PetscFunctionReturn(0);
508 }
509 
510 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
511 {
512   PetscErrorCode   ierr;
513   PetscInt         xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,dim,s,k,nc;
514   PetscInt         ncolors;
515   MPI_Comm         comm;
516   DMBoundaryType bx,by;
517   ISColoringValue  *colors;
518   DM_DA            *dd = (DM_DA*)da->data;
519 
520   PetscFunctionBegin;
521   /*
522          nc - number of components per grid point
523          col - number of colors needed in one direction for single component problem
524 
525   */
526   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,0);CHKERRQ(ierr);
527   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
528   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
529   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
530 
531   if (bx == DM_BOUNDARY_PERIODIC && (m % 5)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible by 5\n");
532   if (by == DM_BOUNDARY_PERIODIC && (n % 5)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible by 5\n");
533 
534   /* create the coloring */
535   if (ctype == IS_COLORING_GLOBAL) {
536     if (!dd->localcoloring) {
537       ierr = PetscMalloc1(nc*nx*ny,&colors);CHKERRQ(ierr);
538       ii   = 0;
539       for (j=ys; j<ys+ny; j++) {
540         for (i=xs; i<xs+nx; i++) {
541           for (k=0; k<nc; k++) {
542             colors[ii++] = k + nc*((3*j+i) % 5);
543           }
544         }
545       }
546       ncolors = 5*nc;
547       ierr    = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
548     }
549     *coloring = dd->localcoloring;
550   } else if (ctype == IS_COLORING_LOCAL) {
551     if (!dd->ghostedcoloring) {
552       ierr = PetscMalloc1(nc*gnx*gny,&colors);CHKERRQ(ierr);
553       ii = 0;
554       for (j=gys; j<gys+gny; j++) {
555         for (i=gxs; i<gxs+gnx; i++) {
556           for (k=0; k<nc; k++) {
557             colors[ii++] = k + nc*((3*SetInRange(j,n) + SetInRange(i,m)) % 5);
558           }
559         }
560       }
561       ncolors = 5*nc;
562       ierr    = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
563       ierr    = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
564     }
565     *coloring = dd->ghostedcoloring;
566   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
567   PetscFunctionReturn(0);
568 }
569 
570 /* =========================================================================== */
571 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM,Mat,PetscBool);
572 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM,Mat);
573 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM,Mat,PetscBool);
574 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM,Mat);
575 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM,Mat,PetscBool);
576 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM,Mat);
577 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM,Mat);
578 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM,Mat);
579 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM,Mat);
580 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM,Mat);
581 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM,Mat);
582 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM,Mat);
583 extern PetscErrorCode DMCreateMatrix_DA_IS(DM,Mat);
584 
585 /*@C
586    MatSetupDM - Sets the DMDA that is to be used by the HYPRE_StructMatrix PETSc matrix
587 
588    Logically Collective on mat
589 
590    Input Parameters:
591 +  mat - the matrix
592 -  da - the da
593 
594    Level: intermediate
595 
596 @*/
597 PetscErrorCode MatSetupDM(Mat mat,DM da)
598 {
599   PetscErrorCode ierr;
600 
601   PetscFunctionBegin;
602   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
603   PetscValidHeaderSpecificType(da,DM_CLASSID,1,DMDA);
604   ierr = PetscTryMethod(mat,"MatSetupDM_C",(Mat,DM),(mat,da));CHKERRQ(ierr);
605   PetscFunctionReturn(0);
606 }
607 
608 PetscErrorCode  MatView_MPI_DA(Mat A,PetscViewer viewer)
609 {
610   DM                da;
611   PetscErrorCode    ierr;
612   const char        *prefix;
613   Mat               Anatural;
614   AO                ao;
615   PetscInt          rstart,rend,*petsc,i;
616   IS                is;
617   MPI_Comm          comm;
618   PetscViewerFormat format;
619 
620   PetscFunctionBegin;
621   /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */
622   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
623   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(0);
624 
625   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
626   ierr = MatGetDM(A, &da);CHKERRQ(ierr);
627   if (!da) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
628 
629   ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);
630   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
631   ierr = PetscMalloc1(rend-rstart,&petsc);CHKERRQ(ierr);
632   for (i=rstart; i<rend; i++) petsc[i-rstart] = i;
633   ierr = AOApplicationToPetsc(ao,rend-rstart,petsc);CHKERRQ(ierr);
634   ierr = ISCreateGeneral(comm,rend-rstart,petsc,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
635 
636   /* call viewer on natural ordering */
637   ierr = MatCreateSubMatrix(A,is,is,MAT_INITIAL_MATRIX,&Anatural);CHKERRQ(ierr);
638   ierr = ISDestroy(&is);CHKERRQ(ierr);
639   ierr = PetscObjectGetOptionsPrefix((PetscObject)A,&prefix);CHKERRQ(ierr);
640   ierr = PetscObjectSetOptionsPrefix((PetscObject)Anatural,prefix);CHKERRQ(ierr);
641   ierr = PetscObjectSetName((PetscObject)Anatural,((PetscObject)A)->name);CHKERRQ(ierr);
642   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE;
643   ierr = MatView(Anatural,viewer);CHKERRQ(ierr);
644   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE;
645   ierr = MatDestroy(&Anatural);CHKERRQ(ierr);
646   PetscFunctionReturn(0);
647 }
648 
649 PetscErrorCode  MatLoad_MPI_DA(Mat A,PetscViewer viewer)
650 {
651   DM             da;
652   PetscErrorCode ierr;
653   Mat            Anatural,Aapp;
654   AO             ao;
655   PetscInt       rstart,rend,*app,i,m,n,M,N;
656   IS             is;
657   MPI_Comm       comm;
658 
659   PetscFunctionBegin;
660   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
661   ierr = MatGetDM(A, &da);CHKERRQ(ierr);
662   if (!da) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
663 
664   /* Load the matrix in natural ordering */
665   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Anatural);CHKERRQ(ierr);
666   ierr = MatSetType(Anatural,((PetscObject)A)->type_name);CHKERRQ(ierr);
667   ierr = MatGetSize(A,&M,&N);CHKERRQ(ierr);
668   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
669   ierr = MatSetSizes(Anatural,m,n,M,N);CHKERRQ(ierr);
670   ierr = MatLoad(Anatural,viewer);CHKERRQ(ierr);
671 
672   /* Map natural ordering to application ordering and create IS */
673   ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);
674   ierr = MatGetOwnershipRange(Anatural,&rstart,&rend);CHKERRQ(ierr);
675   ierr = PetscMalloc1(rend-rstart,&app);CHKERRQ(ierr);
676   for (i=rstart; i<rend; i++) app[i-rstart] = i;
677   ierr = AOPetscToApplication(ao,rend-rstart,app);CHKERRQ(ierr);
678   ierr = ISCreateGeneral(comm,rend-rstart,app,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
679 
680   /* Do permutation and replace header */
681   ierr = MatCreateSubMatrix(Anatural,is,is,MAT_INITIAL_MATRIX,&Aapp);CHKERRQ(ierr);
682   ierr = MatHeaderReplace(A,&Aapp);CHKERRQ(ierr);
683   ierr = ISDestroy(&is);CHKERRQ(ierr);
684   ierr = MatDestroy(&Anatural);CHKERRQ(ierr);
685   PetscFunctionReturn(0);
686 }
687 
688 PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J)
689 {
690   PetscErrorCode ierr;
691   PetscInt       dim,dof,nx,ny,nz,dims[3],starts[3],M,N,P;
692   Mat            A;
693   MPI_Comm       comm;
694   MatType        Atype;
695   PetscSection   section, sectionGlobal;
696   void           (*aij)(void)=NULL,(*baij)(void)=NULL,(*sbaij)(void)=NULL,(*sell)(void)=NULL,(*is)(void)=NULL;
697   MatType        mtype;
698   PetscMPIInt    size;
699   DM_DA          *dd = (DM_DA*)da->data;
700 
701   PetscFunctionBegin;
702   ierr = MatInitializePackage();CHKERRQ(ierr);
703   mtype = da->mattype;
704 
705   ierr = DMGetSection(da, &section);CHKERRQ(ierr);
706   if (section) {
707     PetscInt  bs = -1;
708     PetscInt  localSize;
709     PetscBool isShell, isBlock, isSeqBlock, isMPIBlock, isSymBlock, isSymSeqBlock, isSymMPIBlock, isSymmetric;
710 
711     ierr = DMGetGlobalSection(da, &sectionGlobal);CHKERRQ(ierr);
712     ierr = PetscSectionGetConstrainedStorageSize(sectionGlobal, &localSize);CHKERRQ(ierr);
713     ierr = MatCreate(PetscObjectComm((PetscObject)da),&A);CHKERRQ(ierr);
714     ierr = MatSetSizes(A,localSize,localSize,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
715     ierr = MatSetType(A,mtype);CHKERRQ(ierr);
716     ierr = PetscStrcmp(mtype,MATSHELL,&isShell);CHKERRQ(ierr);
717     ierr = PetscStrcmp(mtype,MATBAIJ,&isBlock);CHKERRQ(ierr);
718     ierr = PetscStrcmp(mtype,MATSEQBAIJ,&isSeqBlock);CHKERRQ(ierr);
719     ierr = PetscStrcmp(mtype,MATMPIBAIJ,&isMPIBlock);CHKERRQ(ierr);
720     ierr = PetscStrcmp(mtype,MATSBAIJ,&isSymBlock);CHKERRQ(ierr);
721     ierr = PetscStrcmp(mtype,MATSEQSBAIJ,&isSymSeqBlock);CHKERRQ(ierr);
722     ierr = PetscStrcmp(mtype,MATMPISBAIJ,&isSymMPIBlock);CHKERRQ(ierr);
723     /* Check for symmetric storage */
724     isSymmetric = (PetscBool) (isSymBlock || isSymSeqBlock || isSymMPIBlock);
725     if (isSymmetric) {
726       ierr = MatSetOption(*J, MAT_IGNORE_LOWER_TRIANGULAR, PETSC_TRUE);CHKERRQ(ierr);
727     }
728     if (!isShell) {
729       PetscInt *dnz, *onz, *dnzu, *onzu, bsLocal;
730 
731       if (bs < 0) {
732         if (isBlock || isSeqBlock || isMPIBlock || isSymBlock || isSymSeqBlock || isSymMPIBlock) {
733           PetscInt pStart, pEnd, p, dof;
734 
735           ierr = PetscSectionGetChart(sectionGlobal, &pStart, &pEnd);CHKERRQ(ierr);
736           for (p = pStart; p < pEnd; ++p) {
737             ierr = PetscSectionGetDof(sectionGlobal, p, &dof);CHKERRQ(ierr);
738             if (dof) {
739               bs = dof;
740               break;
741             }
742           }
743         } else {
744           bs = 1;
745         }
746         /* Must have same blocksize on all procs (some might have no points) */
747         bsLocal = bs;
748         ierr    = MPIU_Allreduce(&bsLocal, &bs, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)da));CHKERRQ(ierr);
749       }
750       ierr = PetscCalloc4(localSize/bs, &dnz, localSize/bs, &onz, localSize/bs, &dnzu, localSize/bs, &onzu);CHKERRQ(ierr);
751       /* ierr = DMPlexPreallocateOperator(dm, bs, section, sectionGlobal, dnz, onz, dnzu, onzu, *J, fillMatrix);CHKERRQ(ierr); */
752       ierr = PetscFree4(dnz, onz, dnzu, onzu);CHKERRQ(ierr);
753     }
754   }
755   /*
756                                   m
757           ------------------------------------------------------
758          |                                                     |
759          |                                                     |
760          |               ----------------------                |
761          |               |                    |                |
762       n  |           ny  |                    |                |
763          |               |                    |                |
764          |               .---------------------                |
765          |             (xs,ys)     nx                          |
766          |            .                                        |
767          |         (gxs,gys)                                   |
768          |                                                     |
769           -----------------------------------------------------
770   */
771 
772   /*
773          nc - number of components per grid point
774          col - number of colors needed in one direction for single component problem
775 
776   */
777   M   = dd->M;
778   N   = dd->N;
779   P   = dd->P;
780   dim = da->dim;
781   dof = dd->w;
782   /* ierr = DMDAGetInfo(da,&dim,&M,&N,&P,0,0,0,&dof,0,0,0,0,0);CHKERRQ(ierr); */
783   ierr = DMDAGetCorners(da,0,0,0,&nx,&ny,&nz);CHKERRQ(ierr);
784   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
785   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
786   ierr = MatSetSizes(A,dof*nx*ny*nz,dof*nx*ny*nz,dof*M*N*P,dof*M*N*P);CHKERRQ(ierr);
787   ierr = MatSetType(A,mtype);CHKERRQ(ierr);
788   ierr = MatSetFromOptions(A);CHKERRQ(ierr);
789   ierr = MatSetDM(A,da);CHKERRQ(ierr);
790   if (da->structure_only) {
791     ierr = MatSetOption(A,MAT_STRUCTURE_ONLY,PETSC_TRUE);CHKERRQ(ierr);
792   }
793   ierr = MatGetType(A,&Atype);CHKERRQ(ierr);
794   /*
795      We do not provide a getmatrix function in the DMDA operations because
796    the basic DMDA does not know about matrices. We think of DMDA as being more
797    more low-level than matrices. This is kind of cheating but, cause sometimes
798    we think of DMDA has higher level than matrices.
799 
800      We could switch based on Atype (or mtype), but we do not since the
801    specialized setting routines depend only on the particular preallocation
802    details of the matrix, not the type itself.
803   */
804   ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJSetPreallocation_C",&aij);CHKERRQ(ierr);
805   if (!aij) {
806     ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJSetPreallocation_C",&aij);CHKERRQ(ierr);
807   }
808   if (!aij) {
809     ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIBAIJSetPreallocation_C",&baij);CHKERRQ(ierr);
810     if (!baij) {
811       ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",&baij);CHKERRQ(ierr);
812     }
813     if (!baij) {
814       ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPISBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr);
815       if (!sbaij) {
816         ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr);
817       }
818       if (!sbaij) {
819         ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPISELLSetPreallocation_C",&sell);CHKERRQ(ierr);
820         if (!sell) {
821           ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqSELLSetPreallocation_C",&sell);CHKERRQ(ierr);
822         }
823       }
824       if (!sell) {
825         ierr = PetscObjectQueryFunction((PetscObject)A,"MatISSetPreallocation_C",&is);CHKERRQ(ierr);
826       }
827     }
828   }
829   if (aij) {
830     if (dim == 1) {
831       if (dd->ofill) {
832         ierr = DMCreateMatrix_DA_1d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
833       } else {
834         ierr = DMCreateMatrix_DA_1d_MPIAIJ(da,A,PETSC_FALSE);CHKERRQ(ierr);
835       }
836     } else if (dim == 2) {
837       if (dd->ofill) {
838         ierr = DMCreateMatrix_DA_2d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
839       } else {
840         ierr = DMCreateMatrix_DA_2d_MPIAIJ(da,A,PETSC_FALSE);CHKERRQ(ierr);
841       }
842     } else if (dim == 3) {
843       if (dd->ofill) {
844         ierr = DMCreateMatrix_DA_3d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
845       } else {
846         ierr = DMCreateMatrix_DA_3d_MPIAIJ(da,A,PETSC_FALSE);CHKERRQ(ierr);
847       }
848     }
849   } else if (baij) {
850     if (dim == 2) {
851       ierr = DMCreateMatrix_DA_2d_MPIBAIJ(da,A);CHKERRQ(ierr);
852     } else if (dim == 3) {
853       ierr = DMCreateMatrix_DA_3d_MPIBAIJ(da,A);CHKERRQ(ierr);
854     } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
855   } else if (sbaij) {
856     if (dim == 2) {
857       ierr = DMCreateMatrix_DA_2d_MPISBAIJ(da,A);CHKERRQ(ierr);
858     } else if (dim == 3) {
859       ierr = DMCreateMatrix_DA_3d_MPISBAIJ(da,A);CHKERRQ(ierr);
860     } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
861   } else if (sell) {
862      if (dim == 2) {
863        ierr = DMCreateMatrix_DA_2d_MPISELL(da,A);CHKERRQ(ierr);
864      } else if (dim == 3) {
865        ierr = DMCreateMatrix_DA_3d_MPISELL(da,A);CHKERRQ(ierr);
866      } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
867   } else if (is) {
868     ierr = DMCreateMatrix_DA_IS(da,A);CHKERRQ(ierr);
869   } else {
870     ISLocalToGlobalMapping ltog;
871 
872     ierr = MatSetBlockSize(A,dof);CHKERRQ(ierr);
873     ierr = MatSetUp(A);CHKERRQ(ierr);
874     ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
875     ierr = MatSetLocalToGlobalMapping(A,ltog,ltog);CHKERRQ(ierr);
876   }
877   ierr = DMDAGetGhostCorners(da,&starts[0],&starts[1],&starts[2],&dims[0],&dims[1],&dims[2]);CHKERRQ(ierr);
878   ierr = MatSetStencil(A,dim,dims,starts,dof);CHKERRQ(ierr);
879   ierr = MatSetDM(A,da);CHKERRQ(ierr);
880   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
881   if (size > 1) {
882     /* change viewer to display matrix in natural ordering */
883     ierr = MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA);CHKERRQ(ierr);
884     ierr = MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA);CHKERRQ(ierr);
885   }
886   *J = A;
887   PetscFunctionReturn(0);
888 }
889 
890 /* ---------------------------------------------------------------------------------*/
891 PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
892 
893 PetscErrorCode DMCreateMatrix_DA_IS(DM dm,Mat J)
894 {
895   DM_DA                  *da = (DM_DA*)dm->data;
896   Mat                    lJ;
897   ISLocalToGlobalMapping ltog;
898   IS                     is_loc_filt, is_glob;
899   const PetscInt         *e_loc,*idx;
900   PetscInt               nel,nen,nv,dof,dim,*gidx,nb;
901   PetscBool              flg;
902   PetscErrorCode         ierr;
903 
904   /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted)
905      We need to filter the local indices that are represented through the DMDAGetElements decomposition
906      This is because the size of the local matrices in MATIS is the local size of the l2g map */
907   PetscFunctionBegin;
908   dof  = da->w;
909   dim  = dm->dim;
910 
911   ierr = MatSetBlockSize(J,dof);CHKERRQ(ierr);
912 
913   /* get local elements indices in local DMDA numbering */
914   ierr = DMDAGetElements(dm,&nel,&nen,&e_loc);CHKERRQ(ierr); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */
915   ierr = ISCreateBlock(PetscObjectComm((PetscObject)dm),dof,nel*nen,e_loc,PETSC_COPY_VALUES,&is_loc_filt);CHKERRQ(ierr);
916   ierr = DMDARestoreElements(dm,&nel,&nen,&e_loc);CHKERRQ(ierr);
917 
918   /* obtain a consistent local ordering for MATIS */
919   ierr = ISSortRemoveDups(is_loc_filt);CHKERRQ(ierr);
920   ierr = ISBlockGetLocalSize(is_loc_filt,&nb);CHKERRQ(ierr);
921   ierr = DMGetLocalToGlobalMapping(dm,&ltog);CHKERRQ(ierr);
922   ierr = ISLocalToGlobalMappingGetSize(ltog,&nv);CHKERRQ(ierr);
923   ierr = PetscMalloc1(PetscMax(nb,nv/dof),&gidx);CHKERRQ(ierr);
924   ierr = ISBlockGetIndices(is_loc_filt,&idx);CHKERRQ(ierr);
925   ierr = ISLocalToGlobalMappingApplyBlock(ltog,nb,idx,gidx);CHKERRQ(ierr);
926   ierr = ISBlockRestoreIndices(is_loc_filt,&idx);CHKERRQ(ierr);
927   ierr = ISCreateBlock(PetscObjectComm((PetscObject)dm),dof,nb,gidx,PETSC_USE_POINTER,&is_glob);CHKERRQ(ierr);
928   ierr = ISLocalToGlobalMappingCreateIS(is_glob,&ltog);CHKERRQ(ierr);
929   ierr = ISDestroy(&is_glob);CHKERRQ(ierr);
930   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
931   ierr = ISLocalToGlobalMappingDestroy(&ltog);CHKERRQ(ierr);
932 
933   /* We also attach a l2g map to the local matrices to have MatSetValueLocal to work */
934   ierr = MatISGetLocalMat(J,&lJ);CHKERRQ(ierr);
935   ierr = ISLocalToGlobalMappingCreateIS(is_loc_filt,&ltog);CHKERRQ(ierr);
936   ierr = ISDestroy(&is_loc_filt);CHKERRQ(ierr);
937   ierr = ISCreateStride(PetscObjectComm((PetscObject)lJ),nv/dof,0,1,&is_glob);CHKERRQ(ierr);
938   ierr = ISGetIndices(is_glob,&idx);CHKERRQ(ierr);
939   ierr = ISGlobalToLocalMappingApplyBlock(ltog,IS_GTOLM_MASK,nv/dof,idx,&nb,gidx);CHKERRQ(ierr);
940   ierr = ISRestoreIndices(is_glob,&idx);CHKERRQ(ierr);
941   ierr = ISDestroy(&is_glob);CHKERRQ(ierr);
942   ierr = ISLocalToGlobalMappingDestroy(&ltog);CHKERRQ(ierr);
943   ierr = ISCreateBlock(PETSC_COMM_SELF,dof,nb,gidx,PETSC_USE_POINTER,&is_loc_filt);CHKERRQ(ierr);
944   ierr = ISLocalToGlobalMappingCreateIS(is_loc_filt,&ltog);CHKERRQ(ierr);
945   ierr = ISDestroy(&is_loc_filt);CHKERRQ(ierr);
946   ierr = MatSetLocalToGlobalMapping(lJ,ltog,ltog);CHKERRQ(ierr);
947   ierr = ISLocalToGlobalMappingDestroy(&ltog);CHKERRQ(ierr);
948   ierr = PetscFree(gidx);CHKERRQ(ierr);
949 
950   /* Preallocation (not exact): we reuse the preallocation routines of the assembled version  */
951   flg = dm->prealloc_only;
952   dm->prealloc_only = PETSC_TRUE;
953   switch (dim) {
954   case 1:
955     ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",MatISSetPreallocation_IS);CHKERRQ(ierr);
956     ierr = DMCreateMatrix_DA_1d_MPIAIJ(dm,J,PETSC_TRUE);CHKERRQ(ierr);
957     ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
958     break;
959   case 2:
960     ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",MatISSetPreallocation_IS);CHKERRQ(ierr);
961     ierr = DMCreateMatrix_DA_2d_MPIAIJ(dm,J,PETSC_TRUE);CHKERRQ(ierr);
962     ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
963     break;
964   case 3:
965     ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",MatISSetPreallocation_IS);CHKERRQ(ierr);
966     ierr = DMCreateMatrix_DA_3d_MPIAIJ(dm,J,PETSC_TRUE);CHKERRQ(ierr);
967     ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
968     break;
969   default:
970     SETERRQ1(PetscObjectComm((PetscObject)dm),PETSC_ERR_SUP,"Unhandled dimension %d",dim);
971     break;
972   }
973   dm->prealloc_only = flg;
974   PetscFunctionReturn(0);
975 }
976 
977 PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da,Mat J)
978 {
979   PetscErrorCode         ierr;
980   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p;
981   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
982   MPI_Comm               comm;
983   PetscScalar            *values;
984   DMBoundaryType         bx,by;
985   ISLocalToGlobalMapping ltog;
986   DMDAStencilType        st;
987 
988   PetscFunctionBegin;
989   /*
990          nc - number of components per grid point
991          col - number of colors needed in one direction for single component problem
992 
993   */
994   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
995   col  = 2*s + 1;
996   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
997   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
998   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
999 
1000   ierr = PetscMalloc2(nc,&rows,col*col*nc*nc,&cols);CHKERRQ(ierr);
1001   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1002 
1003   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1004   /* determine the matrix preallocation information */
1005   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
1006   for (i=xs; i<xs+nx; i++) {
1007 
1008     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1009     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1010 
1011     for (j=ys; j<ys+ny; j++) {
1012       slot = i - gxs + gnx*(j - gys);
1013 
1014       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1015       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1016 
1017       cnt = 0;
1018       for (k=0; k<nc; k++) {
1019         for (l=lstart; l<lend+1; l++) {
1020           for (p=pstart; p<pend+1; p++) {
1021             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
1022               cols[cnt++] = k + nc*(slot + gnx*l + p);
1023             }
1024           }
1025         }
1026         rows[k] = k + nc*(slot);
1027       }
1028       ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1029     }
1030   }
1031   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1032   ierr = MatSeqSELLSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1033   ierr = MatMPISELLSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1034   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1035 
1036   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1037 
1038   /*
1039     For each node in the grid: we get the neighbors in the local (on processor ordering
1040     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1041     PETSc ordering.
1042   */
1043   if (!da->prealloc_only) {
1044     ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr);
1045     for (i=xs; i<xs+nx; i++) {
1046 
1047       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1048       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1049 
1050       for (j=ys; j<ys+ny; j++) {
1051         slot = i - gxs + gnx*(j - gys);
1052 
1053         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1054         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1055 
1056         cnt = 0;
1057         for (k=0; k<nc; k++) {
1058           for (l=lstart; l<lend+1; l++) {
1059             for (p=pstart; p<pend+1; p++) {
1060               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
1061                 cols[cnt++] = k + nc*(slot + gnx*l + p);
1062               }
1063             }
1064           }
1065           rows[k] = k + nc*(slot);
1066         }
1067         ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1068       }
1069     }
1070     ierr = PetscFree(values);CHKERRQ(ierr);
1071     /* do not copy values to GPU since they are all zero and not yet needed there */
1072     ierr = MatPinToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1073     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1074     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1075     ierr = MatPinToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1076     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1077   }
1078   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1079   PetscFunctionReturn(0);
1080 }
1081 
1082 PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da,Mat J)
1083 {
1084   PetscErrorCode         ierr;
1085   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1086   PetscInt               m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL;
1087   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
1088   MPI_Comm               comm;
1089   PetscScalar            *values;
1090   DMBoundaryType         bx,by,bz;
1091   ISLocalToGlobalMapping ltog;
1092   DMDAStencilType        st;
1093 
1094   PetscFunctionBegin;
1095   /*
1096          nc - number of components per grid point
1097          col - number of colors needed in one direction for single component problem
1098 
1099   */
1100   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1101   col  = 2*s + 1;
1102   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1103   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1104   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1105 
1106   ierr = PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols);CHKERRQ(ierr);
1107   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1108 
1109   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1110   /* determine the matrix preallocation information */
1111   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1112   for (i=xs; i<xs+nx; i++) {
1113     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1114     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1115     for (j=ys; j<ys+ny; j++) {
1116       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1117       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1118       for (k=zs; k<zs+nz; k++) {
1119         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1120         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1121 
1122         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1123 
1124         cnt = 0;
1125         for (l=0; l<nc; l++) {
1126           for (ii=istart; ii<iend+1; ii++) {
1127             for (jj=jstart; jj<jend+1; jj++) {
1128               for (kk=kstart; kk<kend+1; kk++) {
1129                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1130                   cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1131                 }
1132               }
1133             }
1134           }
1135           rows[l] = l + nc*(slot);
1136         }
1137         ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1138       }
1139     }
1140   }
1141   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1142   ierr = MatSeqSELLSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1143   ierr = MatMPISELLSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1144   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1145   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1146 
1147   /*
1148     For each node in the grid: we get the neighbors in the local (on processor ordering
1149     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1150     PETSc ordering.
1151   */
1152   if (!da->prealloc_only) {
1153     ierr = PetscCalloc1(col*col*col*nc*nc*nc,&values);CHKERRQ(ierr);
1154     for (i=xs; i<xs+nx; i++) {
1155       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1156       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1157       for (j=ys; j<ys+ny; j++) {
1158         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1159         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1160         for (k=zs; k<zs+nz; k++) {
1161           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1162           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1163 
1164           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1165 
1166           cnt = 0;
1167           for (l=0; l<nc; l++) {
1168             for (ii=istart; ii<iend+1; ii++) {
1169               for (jj=jstart; jj<jend+1; jj++) {
1170                 for (kk=kstart; kk<kend+1; kk++) {
1171                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1172                     cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1173                   }
1174                 }
1175               }
1176             }
1177             rows[l] = l + nc*(slot);
1178           }
1179           ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1180         }
1181       }
1182     }
1183     ierr = PetscFree(values);CHKERRQ(ierr);
1184     /* do not copy values to GPU since they are all zero and not yet needed there */
1185     ierr = MatPinToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1186     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1187     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1188     ierr = MatPinToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1189     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1190   }
1191   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1192   PetscFunctionReturn(0);
1193 }
1194 
1195 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da,Mat J,PetscBool isIS)
1196 {
1197   PetscErrorCode         ierr;
1198   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,M,N;
1199   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
1200   MPI_Comm               comm;
1201   DMBoundaryType         bx,by;
1202   ISLocalToGlobalMapping ltog,mltog;
1203   DMDAStencilType        st;
1204   PetscBool              removedups = PETSC_FALSE;
1205 
1206   PetscFunctionBegin;
1207   /*
1208          nc - number of components per grid point
1209          col - number of colors needed in one direction for single component problem
1210 
1211   */
1212   ierr = DMDAGetInfo(da,&dim,&m,&n,&M,&N,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
1213   if (!isIS && bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) {
1214     ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE);CHKERRQ(ierr);
1215   }
1216   col  = 2*s + 1;
1217   /*
1218        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1219        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1220   */
1221   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1222   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1223   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
1224   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
1225   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1226 
1227   ierr = PetscMalloc2(nc,&rows,col*col*nc*nc,&cols);CHKERRQ(ierr);
1228   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1229 
1230   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1231   /* determine the matrix preallocation information */
1232   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
1233   for (i=xs; i<xs+nx; i++) {
1234 
1235     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1236     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1237 
1238     for (j=ys; j<ys+ny; j++) {
1239       slot = i - gxs + gnx*(j - gys);
1240 
1241       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1242       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1243 
1244       cnt = 0;
1245       for (k=0; k<nc; k++) {
1246         for (l=lstart; l<lend+1; l++) {
1247           for (p=pstart; p<pend+1; p++) {
1248             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
1249               cols[cnt++] = k + nc*(slot + gnx*l + p);
1250             }
1251           }
1252         }
1253         rows[k] = k + nc*(slot);
1254       }
1255       if (removedups) {
1256         ierr = MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1257       } else {
1258         ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1259       }
1260     }
1261   }
1262   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1263   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1264   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1265   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1266   ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr);
1267   if (!mltog) {
1268     ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1269   }
1270 
1271   /*
1272     For each node in the grid: we get the neighbors in the local (on processor ordering
1273     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1274     PETSc ordering.
1275   */
1276   if (!da->prealloc_only) {
1277     for (i=xs; i<xs+nx; i++) {
1278 
1279       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1280       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1281 
1282       for (j=ys; j<ys+ny; j++) {
1283         slot = i - gxs + gnx*(j - gys);
1284 
1285         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1286         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1287 
1288         cnt = 0;
1289         for (l=lstart; l<lend+1; l++) {
1290           for (p=pstart; p<pend+1; p++) {
1291             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
1292               cols[cnt++] = nc*(slot + gnx*l + p);
1293               for (k=1; k<nc; k++) {
1294                 cols[cnt] = 1 + cols[cnt-1];cnt++;
1295               }
1296             }
1297           }
1298         }
1299         for (k=0; k<nc; k++) rows[k] = k + nc*(slot);
1300         ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr);
1301       }
1302     }
1303     /* do not copy values to GPU since they are all zero and not yet needed there */
1304     ierr = MatPinToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1305     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1306     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1307     ierr = MatPinToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1308     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1309     if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) {
1310       ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr);
1311     }
1312   }
1313   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1314   PetscFunctionReturn(0);
1315 }
1316 
1317 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da,Mat J)
1318 {
1319   PetscErrorCode         ierr;
1320   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1321   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt,maxcnt = 0,l,p,M,N;
1322   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
1323   DM_DA                  *dd = (DM_DA*)da->data;
1324   PetscInt               ifill_col,*ofill = dd->ofill, *dfill = dd->dfill;
1325   MPI_Comm               comm;
1326   DMBoundaryType         bx,by;
1327   ISLocalToGlobalMapping ltog;
1328   DMDAStencilType        st;
1329   PetscBool              removedups = PETSC_FALSE;
1330 
1331   PetscFunctionBegin;
1332   /*
1333          nc - number of components per grid point
1334          col - number of colors needed in one direction for single component problem
1335 
1336   */
1337   ierr = DMDAGetInfo(da,&dim,&m,&n,&M,&N,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
1338   col  = 2*s + 1;
1339   /*
1340        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1341        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1342   */
1343   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1344   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1345   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
1346   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
1347   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1348 
1349   ierr = PetscMalloc1(col*col*nc,&cols);CHKERRQ(ierr);
1350   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1351 
1352   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1353   /* determine the matrix preallocation information */
1354   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
1355   for (i=xs; i<xs+nx; i++) {
1356 
1357     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1358     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1359 
1360     for (j=ys; j<ys+ny; j++) {
1361       slot = i - gxs + gnx*(j - gys);
1362 
1363       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1364       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1365 
1366       for (k=0; k<nc; k++) {
1367         cnt = 0;
1368         for (l=lstart; l<lend+1; l++) {
1369           for (p=pstart; p<pend+1; p++) {
1370             if (l || p) {
1371               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
1372                 for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p);
1373               }
1374             } else {
1375               if (dfill) {
1376                 for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p);
1377               } else {
1378                 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p);
1379               }
1380             }
1381           }
1382         }
1383         row    = k + nc*(slot);
1384         maxcnt = PetscMax(maxcnt,cnt);
1385         if (removedups) {
1386           ierr   = MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1387         } else {
1388           ierr   = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1389         }
1390       }
1391     }
1392   }
1393   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1394   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1395   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1396   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1397 
1398   /*
1399     For each node in the grid: we get the neighbors in the local (on processor ordering
1400     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1401     PETSc ordering.
1402   */
1403   if (!da->prealloc_only) {
1404     for (i=xs; i<xs+nx; i++) {
1405 
1406       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1407       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1408 
1409       for (j=ys; j<ys+ny; j++) {
1410         slot = i - gxs + gnx*(j - gys);
1411 
1412         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1413         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1414 
1415         for (k=0; k<nc; k++) {
1416           cnt = 0;
1417           for (l=lstart; l<lend+1; l++) {
1418             for (p=pstart; p<pend+1; p++) {
1419               if (l || p) {
1420                 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
1421                   for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p);
1422                 }
1423               } else {
1424                 if (dfill) {
1425                   for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p);
1426                 } else {
1427                   for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p);
1428                 }
1429               }
1430             }
1431           }
1432           row  = k + nc*(slot);
1433           ierr = MatSetValuesLocal(J,1,&row,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr);
1434         }
1435       }
1436     }
1437     /* do not copy values to GPU since they are all zero and not yet needed there */
1438     ierr = MatPinToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1439     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1440     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1441     ierr = MatPinToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1442     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1443   }
1444   ierr = PetscFree(cols);CHKERRQ(ierr);
1445   PetscFunctionReturn(0);
1446 }
1447 
1448 /* ---------------------------------------------------------------------------------*/
1449 
1450 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da,Mat J,PetscBool isIS)
1451 {
1452   PetscErrorCode         ierr;
1453   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1454   PetscInt               m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL;
1455   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
1456   MPI_Comm               comm;
1457   DMBoundaryType         bx,by,bz;
1458   ISLocalToGlobalMapping ltog,mltog;
1459   DMDAStencilType        st;
1460   PetscBool              removedups = PETSC_FALSE;
1461 
1462   PetscFunctionBegin;
1463   /*
1464          nc - number of components per grid point
1465          col - number of colors needed in one direction for single component problem
1466 
1467   */
1468   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1469   if (!isIS && bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) {
1470     ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE);CHKERRQ(ierr);
1471   }
1472   col  = 2*s + 1;
1473 
1474   /*
1475        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1476        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1477   */
1478   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1479   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1480   if (P == 1 && 2*s >= p) removedups = PETSC_TRUE;
1481 
1482   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1483   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1484   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1485 
1486   ierr = PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols);CHKERRQ(ierr);
1487   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1488 
1489   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1490   /* determine the matrix preallocation information */
1491   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1492   for (i=xs; i<xs+nx; i++) {
1493     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1494     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1495     for (j=ys; j<ys+ny; j++) {
1496       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1497       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1498       for (k=zs; k<zs+nz; k++) {
1499         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1500         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1501 
1502         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1503 
1504         cnt = 0;
1505         for (l=0; l<nc; l++) {
1506           for (ii=istart; ii<iend+1; ii++) {
1507             for (jj=jstart; jj<jend+1; jj++) {
1508               for (kk=kstart; kk<kend+1; kk++) {
1509                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1510                   cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1511                 }
1512               }
1513             }
1514           }
1515           rows[l] = l + nc*(slot);
1516         }
1517         if (removedups) {
1518           ierr = MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1519         } else {
1520           ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1521         }
1522       }
1523     }
1524   }
1525   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1526   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1527   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1528   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1529   ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr);
1530   if (!mltog) {
1531     ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1532   }
1533 
1534   /*
1535     For each node in the grid: we get the neighbors in the local (on processor ordering
1536     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1537     PETSc ordering.
1538   */
1539   if (!da->prealloc_only) {
1540     for (i=xs; i<xs+nx; i++) {
1541       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1542       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1543       for (j=ys; j<ys+ny; j++) {
1544         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1545         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1546         for (k=zs; k<zs+nz; k++) {
1547           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1548           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1549 
1550           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1551 
1552           cnt = 0;
1553           for (kk=kstart; kk<kend+1; kk++) {
1554             for (jj=jstart; jj<jend+1; jj++) {
1555               for (ii=istart; ii<iend+1; ii++) {
1556                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1557                   cols[cnt++] = nc*(slot + ii + gnx*jj + gnx*gny*kk);
1558                     for (l=1; l<nc; l++) {
1559                       cols[cnt] = 1 + cols[cnt-1];cnt++;
1560                   }
1561                 }
1562               }
1563             }
1564           }
1565           rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1];
1566           ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr);
1567         }
1568       }
1569     }
1570     /* do not copy values to GPU since they are all zero and not yet needed there */
1571     ierr = MatPinToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1572     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1573     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1574     if (!isIS && bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) {
1575       ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr);
1576     }
1577     ierr = MatPinToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1578     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1579   }
1580   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1581   PetscFunctionReturn(0);
1582 }
1583 
1584 /* ---------------------------------------------------------------------------------*/
1585 
1586 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da,Mat J)
1587 {
1588   PetscErrorCode         ierr;
1589   DM_DA                  *dd = (DM_DA*)da->data;
1590   PetscInt               xs,nx,i,j,gxs,gnx,row,k,l;
1591   PetscInt               m,dim,s,*cols = NULL,nc,cnt,maxcnt = 0,*ocols;
1592   PetscInt               *ofill = dd->ofill,*dfill = dd->dfill;
1593   DMBoundaryType         bx;
1594   ISLocalToGlobalMapping ltog;
1595   PetscMPIInt            rank,size;
1596 
1597   PetscFunctionBegin;
1598   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)da),&rank);CHKERRQ(ierr);
1599   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)da),&size);CHKERRQ(ierr);
1600 
1601   /*
1602          nc - number of components per grid point
1603 
1604   */
1605   ierr = DMDAGetInfo(da,&dim,&m,0,0,0,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr);
1606   if (s > 1) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Matrix creation for 1d not implemented correctly for stencil width larger than 1");
1607   ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
1608   ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
1609 
1610   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1611   ierr = PetscCalloc2(nx*nc,&cols,nx*nc,&ocols);CHKERRQ(ierr);
1612 
1613   /*
1614         note should be smaller for first and last process with no periodic
1615         does not handle dfill
1616   */
1617   cnt = 0;
1618   /* coupling with process to the left */
1619   for (i=0; i<s; i++) {
1620     for (j=0; j<nc; j++) {
1621       ocols[cnt] = ((!rank) ? 0 : (s - i)*(ofill[j+1] - ofill[j]));
1622       cols[cnt]  = dfill[j+1] - dfill[j] + (s + i)*(ofill[j+1] - ofill[j]);
1623       if (!rank && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1624         if (size > 1) ocols[cnt] += (s - i)*(ofill[j+1] - ofill[j]);
1625         else cols[cnt] += (s - i)*(ofill[j+1] - ofill[j]);
1626       }
1627       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1628       cnt++;
1629     }
1630   }
1631   for (i=s; i<nx-s; i++) {
1632     for (j=0; j<nc; j++) {
1633       cols[cnt] = dfill[j+1] - dfill[j] + 2*s*(ofill[j+1] - ofill[j]);
1634       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1635       cnt++;
1636     }
1637   }
1638   /* coupling with process to the right */
1639   for (i=nx-s; i<nx; i++) {
1640     for (j=0; j<nc; j++) {
1641       ocols[cnt] = ((rank == (size-1)) ? 0 : (i - nx + s + 1)*(ofill[j+1] - ofill[j]));
1642       cols[cnt]  = dfill[j+1] - dfill[j] + (s + nx - i - 1)*(ofill[j+1] - ofill[j]);
1643       if ((rank == size-1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1644         if (size > 1) ocols[cnt] += (i - nx + s + 1)*(ofill[j+1] - ofill[j]);
1645         else cols[cnt] += (i - nx + s + 1)*(ofill[j+1] - ofill[j]);
1646       }
1647       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1648       cnt++;
1649     }
1650   }
1651 
1652   ierr = MatSeqAIJSetPreallocation(J,0,cols);CHKERRQ(ierr);
1653   ierr = MatMPIAIJSetPreallocation(J,0,cols,0,ocols);CHKERRQ(ierr);
1654   ierr = PetscFree2(cols,ocols);CHKERRQ(ierr);
1655 
1656   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1657   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1658 
1659   /*
1660     For each node in the grid: we get the neighbors in the local (on processor ordering
1661     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1662     PETSc ordering.
1663   */
1664   if (!da->prealloc_only) {
1665     ierr = PetscMalloc1(maxcnt,&cols);CHKERRQ(ierr);
1666     row = xs*nc;
1667     /* coupling with process to the left */
1668     for (i=xs; i<xs+s; i++) {
1669       for (j=0; j<nc; j++) {
1670         cnt = 0;
1671         if (rank) {
1672           for (l=0; l<s; l++) {
1673             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1674           }
1675         }
1676         if (!rank && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1677           for (l=0; l<s; l++) {
1678             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (m + i - s - l)*nc + ofill[k];
1679           }
1680         }
1681         if (dfill) {
1682           for (k=dfill[j]; k<dfill[j+1]; k++) {
1683             cols[cnt++] = i*nc + dfill[k];
1684           }
1685         } else {
1686           for (k=0; k<nc; k++) {
1687             cols[cnt++] = i*nc + k;
1688           }
1689         }
1690         for (l=0; l<s; l++) {
1691           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1692         }
1693         ierr = MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr);
1694         row++;
1695       }
1696     }
1697     for (i=xs+s; i<xs+nx-s; i++) {
1698       for (j=0; j<nc; j++) {
1699         cnt = 0;
1700         for (l=0; l<s; l++) {
1701           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1702         }
1703         if (dfill) {
1704           for (k=dfill[j]; k<dfill[j+1]; k++) {
1705             cols[cnt++] = i*nc + dfill[k];
1706           }
1707         } else {
1708           for (k=0; k<nc; k++) {
1709             cols[cnt++] = i*nc + k;
1710           }
1711         }
1712         for (l=0; l<s; l++) {
1713           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1714         }
1715         ierr = MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr);
1716         row++;
1717       }
1718     }
1719     /* coupling with process to the right */
1720     for (i=xs+nx-s; i<xs+nx; i++) {
1721       for (j=0; j<nc; j++) {
1722         cnt = 0;
1723         for (l=0; l<s; l++) {
1724           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1725         }
1726         if (dfill) {
1727           for (k=dfill[j]; k<dfill[j+1]; k++) {
1728             cols[cnt++] = i*nc + dfill[k];
1729           }
1730         } else {
1731           for (k=0; k<nc; k++) {
1732             cols[cnt++] = i*nc + k;
1733           }
1734         }
1735         if (rank < size-1) {
1736           for (l=0; l<s; l++) {
1737             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1738           }
1739         }
1740         if ((rank == size-1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1741           for (l=0; l<s; l++) {
1742             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s - l - m + 2)*nc + ofill[k];
1743           }
1744         }
1745         ierr = MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr);
1746         row++;
1747       }
1748     }
1749     ierr = PetscFree(cols);CHKERRQ(ierr);
1750     /* do not copy values to GPU since they are all zero and not yet needed there */
1751     ierr = MatPinToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1752     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1753     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1754     ierr = MatPinToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1755     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1756   }
1757   PetscFunctionReturn(0);
1758 }
1759 
1760 /* ---------------------------------------------------------------------------------*/
1761 
1762 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da,Mat J,PetscBool isIS)
1763 {
1764   PetscErrorCode         ierr;
1765   PetscInt               xs,nx,i,i1,slot,gxs,gnx;
1766   PetscInt               m,dim,s,*cols = NULL,nc,*rows = NULL,col,cnt,l;
1767   PetscInt               istart,iend;
1768   DMBoundaryType         bx;
1769   ISLocalToGlobalMapping ltog,mltog;
1770 
1771   PetscFunctionBegin;
1772   /*
1773          nc - number of components per grid point
1774          col - number of colors needed in one direction for single component problem
1775 
1776   */
1777   ierr = DMDAGetInfo(da,&dim,&m,0,0,0,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr);
1778   if (!isIS && bx == DM_BOUNDARY_NONE) {
1779     ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE);CHKERRQ(ierr);
1780   }
1781   col  = 2*s + 1;
1782 
1783   ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
1784   ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
1785 
1786   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1787   ierr = MatSeqAIJSetPreallocation(J,col*nc,0);CHKERRQ(ierr);
1788   ierr = MatMPIAIJSetPreallocation(J,col*nc,0,col*nc,0);CHKERRQ(ierr);
1789 
1790   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1791   ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr);
1792   if (!mltog) {
1793     ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1794   }
1795 
1796   /*
1797     For each node in the grid: we get the neighbors in the local (on processor ordering
1798     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1799     PETSc ordering.
1800   */
1801   if (!da->prealloc_only) {
1802     ierr = PetscMalloc2(nc,&rows,col*nc*nc,&cols);CHKERRQ(ierr);
1803     for (i=xs; i<xs+nx; i++) {
1804       istart = PetscMax(-s,gxs - i);
1805       iend   = PetscMin(s,gxs + gnx - i - 1);
1806       slot   = i - gxs;
1807 
1808       cnt = 0;
1809       for (i1=istart; i1<iend+1; i1++) {
1810         cols[cnt++] = nc*(slot + i1);
1811         for (l=1; l<nc; l++) {
1812           cols[cnt] = 1 + cols[cnt-1];cnt++;
1813         }
1814       }
1815       rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1];
1816       ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr);
1817     }
1818     /* do not copy values to GPU since they are all zero and not yet needed there */
1819     ierr = MatPinToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1820     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1821     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1822     if (!isIS && bx == DM_BOUNDARY_NONE) {
1823       ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr);
1824     }
1825     ierr = MatPinToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1826     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1827     ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1828   }
1829   PetscFunctionReturn(0);
1830 }
1831 
1832 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da,Mat J)
1833 {
1834   PetscErrorCode         ierr;
1835   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1836   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
1837   PetscInt               istart,iend,jstart,jend,ii,jj;
1838   MPI_Comm               comm;
1839   PetscScalar            *values;
1840   DMBoundaryType         bx,by;
1841   DMDAStencilType        st;
1842   ISLocalToGlobalMapping ltog;
1843 
1844   PetscFunctionBegin;
1845   /*
1846      nc - number of components per grid point
1847      col - number of colors needed in one direction for single component problem
1848   */
1849   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
1850   col  = 2*s + 1;
1851 
1852   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
1853   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
1854   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1855 
1856   ierr = PetscMalloc1(col*col*nc*nc,&cols);CHKERRQ(ierr);
1857 
1858   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1859 
1860   /* determine the matrix preallocation information */
1861   ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
1862   for (i=xs; i<xs+nx; i++) {
1863     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1864     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1865     for (j=ys; j<ys+ny; j++) {
1866       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1867       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1868       slot   = i - gxs + gnx*(j - gys);
1869 
1870       /* Find block columns in block row */
1871       cnt = 0;
1872       for (ii=istart; ii<iend+1; ii++) {
1873         for (jj=jstart; jj<jend+1; jj++) {
1874           if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1875             cols[cnt++] = slot + ii + gnx*jj;
1876           }
1877         }
1878       }
1879       ierr = MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1880     }
1881   }
1882   ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1883   ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1884   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1885 
1886   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1887 
1888   /*
1889     For each node in the grid: we get the neighbors in the local (on processor ordering
1890     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1891     PETSc ordering.
1892   */
1893   if (!da->prealloc_only) {
1894     ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr);
1895     for (i=xs; i<xs+nx; i++) {
1896       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1897       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1898       for (j=ys; j<ys+ny; j++) {
1899         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1900         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1901         slot = i - gxs + gnx*(j - gys);
1902         cnt  = 0;
1903         for (ii=istart; ii<iend+1; ii++) {
1904           for (jj=jstart; jj<jend+1; jj++) {
1905             if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1906               cols[cnt++] = slot + ii + gnx*jj;
1907             }
1908           }
1909         }
1910         ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1911       }
1912     }
1913     ierr = PetscFree(values);CHKERRQ(ierr);
1914     /* do not copy values to GPU since they are all zero and not yet needed there */
1915     ierr = MatPinToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1916     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1917     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1918     ierr = MatPinToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1919     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1920   }
1921   ierr = PetscFree(cols);CHKERRQ(ierr);
1922   PetscFunctionReturn(0);
1923 }
1924 
1925 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da,Mat J)
1926 {
1927   PetscErrorCode         ierr;
1928   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1929   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
1930   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1931   MPI_Comm               comm;
1932   PetscScalar            *values;
1933   DMBoundaryType         bx,by,bz;
1934   DMDAStencilType        st;
1935   ISLocalToGlobalMapping ltog;
1936 
1937   PetscFunctionBegin;
1938   /*
1939          nc - number of components per grid point
1940          col - number of colors needed in one direction for single component problem
1941 
1942   */
1943   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1944   col  = 2*s + 1;
1945 
1946   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1947   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1948   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1949 
1950   ierr = PetscMalloc1(col*col*col,&cols);CHKERRQ(ierr);
1951 
1952   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1953 
1954   /* determine the matrix preallocation information */
1955   ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1956   for (i=xs; i<xs+nx; i++) {
1957     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1958     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1959     for (j=ys; j<ys+ny; j++) {
1960       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1961       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1962       for (k=zs; k<zs+nz; k++) {
1963         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1964         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1965 
1966         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1967 
1968         /* Find block columns in block row */
1969         cnt = 0;
1970         for (ii=istart; ii<iend+1; ii++) {
1971           for (jj=jstart; jj<jend+1; jj++) {
1972             for (kk=kstart; kk<kend+1; kk++) {
1973               if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1974                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1975               }
1976             }
1977           }
1978         }
1979         ierr = MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1980       }
1981     }
1982   }
1983   ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1984   ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1985   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1986 
1987   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1988 
1989   /*
1990     For each node in the grid: we get the neighbors in the local (on processor ordering
1991     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1992     PETSc ordering.
1993   */
1994   if (!da->prealloc_only) {
1995     ierr = PetscCalloc1(col*col*col*nc*nc,&values);CHKERRQ(ierr);
1996     for (i=xs; i<xs+nx; i++) {
1997       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1998       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1999       for (j=ys; j<ys+ny; j++) {
2000         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2001         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2002         for (k=zs; k<zs+nz; k++) {
2003           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2004           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2005 
2006           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2007 
2008           cnt = 0;
2009           for (ii=istart; ii<iend+1; ii++) {
2010             for (jj=jstart; jj<jend+1; jj++) {
2011               for (kk=kstart; kk<kend+1; kk++) {
2012                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
2013                   cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
2014                 }
2015               }
2016             }
2017           }
2018           ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
2019         }
2020       }
2021     }
2022     ierr = PetscFree(values);CHKERRQ(ierr);
2023     /* do not copy values to GPU since they are all zero and not yet needed there */
2024     ierr = MatPinToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
2025     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2026     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2027     ierr = MatPinToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
2028     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2029   }
2030   ierr = PetscFree(cols);CHKERRQ(ierr);
2031   PetscFunctionReturn(0);
2032 }
2033 
2034 /*
2035   This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to
2036   identify in the local ordering with periodic domain.
2037 */
2038 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog,PetscInt *row,PetscInt *cnt,PetscInt col[])
2039 {
2040   PetscErrorCode ierr;
2041   PetscInt       i,n;
2042 
2043   PetscFunctionBegin;
2044   ierr = ISLocalToGlobalMappingApplyBlock(ltog,1,row,row);CHKERRQ(ierr);
2045   ierr = ISLocalToGlobalMappingApplyBlock(ltog,*cnt,col,col);CHKERRQ(ierr);
2046   for (i=0,n=0; i<*cnt; i++) {
2047     if (col[i] >= *row) col[n++] = col[i];
2048   }
2049   *cnt = n;
2050   PetscFunctionReturn(0);
2051 }
2052 
2053 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da,Mat J)
2054 {
2055   PetscErrorCode         ierr;
2056   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
2057   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
2058   PetscInt               istart,iend,jstart,jend,ii,jj;
2059   MPI_Comm               comm;
2060   PetscScalar            *values;
2061   DMBoundaryType         bx,by;
2062   DMDAStencilType        st;
2063   ISLocalToGlobalMapping ltog;
2064 
2065   PetscFunctionBegin;
2066   /*
2067      nc - number of components per grid point
2068      col - number of colors needed in one direction for single component problem
2069   */
2070   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
2071   col  = 2*s + 1;
2072 
2073   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
2074   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
2075   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
2076 
2077   ierr = PetscMalloc1(col*col*nc*nc,&cols);CHKERRQ(ierr);
2078 
2079   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
2080 
2081   /* determine the matrix preallocation information */
2082   ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
2083   for (i=xs; i<xs+nx; i++) {
2084     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2085     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2086     for (j=ys; j<ys+ny; j++) {
2087       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2088       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2089       slot   = i - gxs + gnx*(j - gys);
2090 
2091       /* Find block columns in block row */
2092       cnt = 0;
2093       for (ii=istart; ii<iend+1; ii++) {
2094         for (jj=jstart; jj<jend+1; jj++) {
2095           if (st == DMDA_STENCIL_BOX || !ii || !jj) {
2096             cols[cnt++] = slot + ii + gnx*jj;
2097           }
2098         }
2099       }
2100       ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
2101       ierr = MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
2102     }
2103   }
2104   ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
2105   ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
2106   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2107 
2108   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
2109 
2110   /*
2111     For each node in the grid: we get the neighbors in the local (on processor ordering
2112     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2113     PETSc ordering.
2114   */
2115   if (!da->prealloc_only) {
2116     ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr);
2117     for (i=xs; i<xs+nx; i++) {
2118       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2119       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2120       for (j=ys; j<ys+ny; j++) {
2121         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2122         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2123         slot   = i - gxs + gnx*(j - gys);
2124 
2125         /* Find block columns in block row */
2126         cnt = 0;
2127         for (ii=istart; ii<iend+1; ii++) {
2128           for (jj=jstart; jj<jend+1; jj++) {
2129             if (st == DMDA_STENCIL_BOX || !ii || !jj) {
2130               cols[cnt++] = slot + ii + gnx*jj;
2131             }
2132           }
2133         }
2134         ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
2135         ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
2136       }
2137     }
2138     ierr = PetscFree(values);CHKERRQ(ierr);
2139     /* do not copy values to GPU since they are all zero and not yet needed there */
2140     ierr = MatPinToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
2141     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2142     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2143     ierr = MatPinToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
2144     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2145   }
2146   ierr = PetscFree(cols);CHKERRQ(ierr);
2147   PetscFunctionReturn(0);
2148 }
2149 
2150 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da,Mat J)
2151 {
2152   PetscErrorCode         ierr;
2153   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
2154   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
2155   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
2156   MPI_Comm               comm;
2157   PetscScalar            *values;
2158   DMBoundaryType         bx,by,bz;
2159   DMDAStencilType        st;
2160   ISLocalToGlobalMapping ltog;
2161 
2162   PetscFunctionBegin;
2163   /*
2164      nc - number of components per grid point
2165      col - number of colors needed in one direction for single component problem
2166   */
2167   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
2168   col  = 2*s + 1;
2169 
2170   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
2171   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
2172   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
2173 
2174   /* create the matrix */
2175   ierr = PetscMalloc1(col*col*col,&cols);CHKERRQ(ierr);
2176 
2177   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
2178 
2179   /* determine the matrix preallocation information */
2180   ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
2181   for (i=xs; i<xs+nx; i++) {
2182     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2183     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2184     for (j=ys; j<ys+ny; j++) {
2185       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2186       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2187       for (k=zs; k<zs+nz; k++) {
2188         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2189         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2190 
2191         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2192 
2193         /* Find block columns in block row */
2194         cnt = 0;
2195         for (ii=istart; ii<iend+1; ii++) {
2196           for (jj=jstart; jj<jend+1; jj++) {
2197             for (kk=kstart; kk<kend+1; kk++) {
2198               if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
2199                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
2200               }
2201             }
2202           }
2203         }
2204         ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
2205         ierr = MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
2206       }
2207     }
2208   }
2209   ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
2210   ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
2211   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2212 
2213   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
2214 
2215   /*
2216     For each node in the grid: we get the neighbors in the local (on processor ordering
2217     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2218     PETSc ordering.
2219   */
2220   if (!da->prealloc_only) {
2221     ierr = PetscCalloc1(col*col*col*nc*nc,&values);CHKERRQ(ierr);
2222     for (i=xs; i<xs+nx; i++) {
2223       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2224       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2225       for (j=ys; j<ys+ny; j++) {
2226         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2227         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2228         for (k=zs; k<zs+nz; k++) {
2229           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2230           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2231 
2232           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2233 
2234           cnt = 0;
2235           for (ii=istart; ii<iend+1; ii++) {
2236             for (jj=jstart; jj<jend+1; jj++) {
2237               for (kk=kstart; kk<kend+1; kk++) {
2238                 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
2239                   cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
2240                 }
2241               }
2242             }
2243           }
2244           ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
2245           ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
2246         }
2247       }
2248     }
2249     ierr = PetscFree(values);CHKERRQ(ierr);
2250     /* do not copy values to GPU since they are all zero and not yet needed there */
2251     ierr = MatPinToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
2252     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2253     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2254     ierr = MatPinToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
2255     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2256   }
2257   ierr = PetscFree(cols);CHKERRQ(ierr);
2258   PetscFunctionReturn(0);
2259 }
2260 
2261 /* ---------------------------------------------------------------------------------*/
2262 
2263 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da,Mat J)
2264 {
2265   PetscErrorCode         ierr;
2266   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
2267   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt, maxcnt = 0,l,p,*dnz,*onz;
2268   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
2269   DM_DA                  *dd = (DM_DA*)da->data;
2270   PetscInt               ifill_col,*dfill = dd->dfill,*ofill = dd->ofill;
2271   MPI_Comm               comm;
2272   PetscScalar            *values;
2273   DMBoundaryType         bx,by,bz;
2274   ISLocalToGlobalMapping ltog;
2275   DMDAStencilType        st;
2276   PetscBool              removedups = PETSC_FALSE;
2277 
2278   PetscFunctionBegin;
2279   /*
2280          nc - number of components per grid point
2281          col - number of colors needed in one direction for single component problem
2282 
2283   */
2284   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
2285   col  = 2*s + 1;
2286   if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
2287                  by 2*stencil_width + 1\n");
2288   if (by == DM_BOUNDARY_PERIODIC && (n % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
2289                  by 2*stencil_width + 1\n");
2290   if (bz == DM_BOUNDARY_PERIODIC && (p % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
2291                  by 2*stencil_width + 1\n");
2292 
2293   /*
2294        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
2295        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
2296   */
2297   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
2298   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
2299   if (P == 1 && 2*s >= p) removedups = PETSC_TRUE;
2300 
2301   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
2302   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
2303   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
2304 
2305   ierr = PetscMalloc1(col*col*col*nc,&cols);CHKERRQ(ierr);
2306   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
2307 
2308   /* determine the matrix preallocation information */
2309   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
2310 
2311   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
2312   for (i=xs; i<xs+nx; i++) {
2313     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2314     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2315     for (j=ys; j<ys+ny; j++) {
2316       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2317       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2318       for (k=zs; k<zs+nz; k++) {
2319         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2320         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2321 
2322         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2323 
2324         for (l=0; l<nc; l++) {
2325           cnt = 0;
2326           for (ii=istart; ii<iend+1; ii++) {
2327             for (jj=jstart; jj<jend+1; jj++) {
2328               for (kk=kstart; kk<kend+1; kk++) {
2329                 if (ii || jj || kk) {
2330                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
2331                     for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2332                   }
2333                 } else {
2334                   if (dfill) {
2335                     for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2336                   } else {
2337                     for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2338                   }
2339                 }
2340               }
2341             }
2342           }
2343           row  = l + nc*(slot);
2344           maxcnt = PetscMax(maxcnt,cnt);
2345           if (removedups) {
2346             ierr = MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
2347           } else {
2348             ierr = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
2349           }
2350         }
2351       }
2352     }
2353   }
2354   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
2355   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
2356   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2357   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
2358 
2359   /*
2360     For each node in the grid: we get the neighbors in the local (on processor ordering
2361     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2362     PETSc ordering.
2363   */
2364   if (!da->prealloc_only) {
2365     ierr = PetscCalloc1(maxcnt,&values);CHKERRQ(ierr);
2366     for (i=xs; i<xs+nx; i++) {
2367       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2368       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2369       for (j=ys; j<ys+ny; j++) {
2370         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2371         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2372         for (k=zs; k<zs+nz; k++) {
2373           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2374           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2375 
2376           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2377 
2378           for (l=0; l<nc; l++) {
2379             cnt = 0;
2380             for (ii=istart; ii<iend+1; ii++) {
2381               for (jj=jstart; jj<jend+1; jj++) {
2382                 for (kk=kstart; kk<kend+1; kk++) {
2383                   if (ii || jj || kk) {
2384                     if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
2385                       for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2386                     }
2387                   } else {
2388                     if (dfill) {
2389                       for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2390                     } else {
2391                       for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2392                     }
2393                   }
2394                 }
2395               }
2396             }
2397             row  = l + nc*(slot);
2398             ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
2399           }
2400         }
2401       }
2402     }
2403     ierr = PetscFree(values);CHKERRQ(ierr);
2404     /* do not copy values to GPU since they are all zero and not yet needed there */
2405     ierr = MatPinToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
2406     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2407     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2408     ierr = MatPinToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
2409     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2410   }
2411   ierr = PetscFree(cols);CHKERRQ(ierr);
2412   PetscFunctionReturn(0);
2413 }
2414