xref: /petsc/src/dm/impls/da/fdda.c (revision bef158480efac06de457f7a665168877ab3c2fd7)
1 
2 #include <petsc/private/dmdaimpl.h> /*I      "petscdmda.h"     I*/
3 #include <petscmat.h>
4 
5 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM,ISColoringType,ISColoring*);
6 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM,ISColoringType,ISColoring*);
7 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM,ISColoringType,ISColoring*);
8 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM,ISColoringType,ISColoring*);
9 
10 /*
11    For ghost i that may be negative or greater than the upper bound this
12   maps it into the 0:m-1 range using periodicity
13 */
14 #define SetInRange(i,m) ((i < 0) ? m+i : ((i >= m) ? i-m : i))
15 
16 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill,PetscInt w,PetscInt **rfill)
17 {
18   PetscErrorCode ierr;
19   PetscInt       i,j,nz,*fill;
20 
21   PetscFunctionBegin;
22   if (!dfill) PetscFunctionReturn(0);
23 
24   /* count number nonzeros */
25   nz = 0;
26   for (i=0; i<w; i++) {
27     for (j=0; j<w; j++) {
28       if (dfill[w*i+j]) nz++;
29     }
30   }
31   ierr = PetscMalloc1(nz + w + 1,&fill);CHKERRQ(ierr);
32   /* construct modified CSR storage of nonzero structure */
33   /*  fill[0 -- w] marks starts of each row of column indices (and end of last row)
34    so fill[1] - fill[0] gives number of nonzeros in first row etc */
35   nz = w + 1;
36   for (i=0; i<w; i++) {
37     fill[i] = nz;
38     for (j=0; j<w; j++) {
39       if (dfill[w*i+j]) {
40         fill[nz] = j;
41         nz++;
42       }
43     }
44   }
45   fill[w] = nz;
46 
47   *rfill = fill;
48   PetscFunctionReturn(0);
49 }
50 
51 
52 static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse,PetscInt w,PetscInt **rfill)
53 {
54   PetscErrorCode ierr;
55   PetscInt       nz;
56 
57   PetscFunctionBegin;
58   if (!dfillsparse) PetscFunctionReturn(0);
59 
60   /* Determine number of non-zeros */
61   nz = (dfillsparse[w] - w - 1);
62 
63   /* Allocate space for our copy of the given sparse matrix representation. */
64   ierr = PetscMalloc1(nz + w + 1,rfill);CHKERRQ(ierr);
65   ierr = PetscArraycpy(*rfill,dfillsparse,nz+w+1);CHKERRQ(ierr);
66   PetscFunctionReturn(0);
67 }
68 
69 
70 static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd)
71 {
72   PetscErrorCode ierr;
73   PetscInt       i,k,cnt = 1;
74 
75   PetscFunctionBegin;
76 
77   /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of
78    columns to the left with any nonzeros in them plus 1 */
79   ierr = PetscCalloc1(dd->w,&dd->ofillcols);CHKERRQ(ierr);
80   for (i=0; i<dd->w; i++) {
81     for (k=dd->ofill[i]; k<dd->ofill[i+1]; k++) dd->ofillcols[dd->ofill[k]] = 1;
82   }
83   for (i=0; i<dd->w; i++) {
84     if (dd->ofillcols[i]) {
85       dd->ofillcols[i] = cnt++;
86     }
87   }
88   PetscFunctionReturn(0);
89 }
90 
91 
92 
93 /*@
94     DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem
95     of the matrix returned by DMCreateMatrix().
96 
97     Logically Collective on da
98 
99     Input Parameter:
100 +   da - the distributed array
101 .   dfill - the fill pattern in the diagonal block (may be NULL, means use dense block)
102 -   ofill - the fill pattern in the off-diagonal blocks
103 
104 
105     Level: developer
106 
107     Notes:
108     This only makes sense when you are doing multicomponent problems but using the
109        MPIAIJ matrix format
110 
111            The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries
112        representing coupling and 0 entries for missing coupling. For example
113 $             dfill[9] = {1, 0, 0,
114 $                         1, 1, 0,
115 $                         0, 1, 1}
116        means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with
117        itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the
118        diagonal block).
119 
120      DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
121      can be represented in the dfill, ofill format
122 
123    Contributed by Glenn Hammond
124 
125 .seealso DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly()
126 
127 @*/
128 PetscErrorCode  DMDASetBlockFills(DM da,const PetscInt *dfill,const PetscInt *ofill)
129 {
130   DM_DA          *dd = (DM_DA*)da->data;
131   PetscErrorCode ierr;
132 
133   PetscFunctionBegin;
134   /* save the given dfill and ofill information */
135   ierr = DMDASetBlockFills_Private(dfill,dd->w,&dd->dfill);CHKERRQ(ierr);
136   ierr = DMDASetBlockFills_Private(ofill,dd->w,&dd->ofill);CHKERRQ(ierr);
137 
138   /* count nonzeros in ofill columns */
139   ierr = DMDASetBlockFills_Private2(dd);CHKERRQ(ierr);
140 
141   PetscFunctionReturn(0);
142 }
143 
144 
145 /*@
146     DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem
147     of the matrix returned by DMCreateMatrix(), using sparse representations
148     of fill patterns.
149 
150     Logically Collective on da
151 
152     Input Parameter:
153 +   da - the distributed array
154 .   dfill - the sparse fill pattern in the diagonal block (may be NULL, means use dense block)
155 -   ofill - the sparse fill pattern in the off-diagonal blocks
156 
157 
158     Level: developer
159 
160     Notes: This only makes sense when you are doing multicomponent problems but using the
161        MPIAIJ matrix format
162 
163            The format for dfill and ofill is a sparse representation of a
164            dof-by-dof matrix with 1 entries representing coupling and 0 entries
165            for missing coupling.  The sparse representation is a 1 dimensional
166            array of length nz + dof + 1, where nz is the number of non-zeros in
167            the matrix.  The first dof entries in the array give the
168            starting array indices of each row's items in the rest of the array,
169            the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array)
170            and the remaining nz items give the column indices of each of
171            the 1s within the logical 2D matrix.  Each row's items within
172            the array are the column indices of the 1s within that row
173            of the 2D matrix.  PETSc developers may recognize that this is the
174            same format as that computed by the DMDASetBlockFills_Private()
175            function from a dense 2D matrix representation.
176 
177      DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
178      can be represented in the dfill, ofill format
179 
180    Contributed by Philip C. Roth
181 
182 .seealso DMDASetBlockFills(), DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly()
183 
184 @*/
185 PetscErrorCode  DMDASetBlockFillsSparse(DM da,const PetscInt *dfillsparse,const PetscInt *ofillsparse)
186 {
187   DM_DA          *dd = (DM_DA*)da->data;
188   PetscErrorCode ierr;
189 
190   PetscFunctionBegin;
191   /* save the given dfill and ofill information */
192   ierr = DMDASetBlockFillsSparse_Private(dfillsparse,dd->w,&dd->dfill);CHKERRQ(ierr);
193   ierr = DMDASetBlockFillsSparse_Private(ofillsparse,dd->w,&dd->ofill);CHKERRQ(ierr);
194 
195   /* count nonzeros in ofill columns */
196   ierr = DMDASetBlockFills_Private2(dd);CHKERRQ(ierr);
197 
198   PetscFunctionReturn(0);
199 }
200 
201 
202 PetscErrorCode  DMCreateColoring_DA(DM da,ISColoringType ctype,ISColoring *coloring)
203 {
204   PetscErrorCode   ierr;
205   PetscInt         dim,m,n,p,nc;
206   DMBoundaryType   bx,by,bz;
207   MPI_Comm         comm;
208   PetscMPIInt      size;
209   PetscBool        isBAIJ;
210   DM_DA            *dd = (DM_DA*)da->data;
211 
212   PetscFunctionBegin;
213   /*
214                                   m
215           ------------------------------------------------------
216          |                                                     |
217          |                                                     |
218          |               ----------------------                |
219          |               |                    |                |
220       n  |           yn  |                    |                |
221          |               |                    |                |
222          |               .---------------------                |
223          |             (xs,ys)     xn                          |
224          |            .                                        |
225          |         (gxs,gys)                                   |
226          |                                                     |
227           -----------------------------------------------------
228   */
229 
230   /*
231          nc - number of components per grid point
232          col - number of colors needed in one direction for single component problem
233 
234   */
235   ierr = DMDAGetInfo(da,&dim,NULL,NULL,NULL,&m,&n,&p,&nc,NULL,&bx,&by,&bz,NULL);CHKERRQ(ierr);
236 
237   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
238   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
239   if (ctype == IS_COLORING_LOCAL) {
240     if (size == 1) {
241       ctype = IS_COLORING_GLOBAL;
242     } else if (dim > 1) {
243       if ((m==1 && bx == DM_BOUNDARY_PERIODIC) || (n==1 && by == DM_BOUNDARY_PERIODIC) || (p==1 && bz == DM_BOUNDARY_PERIODIC)) {
244         SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain  on the same process");
245       }
246     }
247   }
248 
249   /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ
250      matrices is for the blocks, not the individual matrix elements  */
251   ierr = PetscStrbeginswith(da->mattype,MATBAIJ,&isBAIJ);CHKERRQ(ierr);
252   if (!isBAIJ) {ierr = PetscStrbeginswith(da->mattype,MATMPIBAIJ,&isBAIJ);CHKERRQ(ierr);}
253   if (!isBAIJ) {ierr = PetscStrbeginswith(da->mattype,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);}
254   if (isBAIJ) {
255     dd->w  = 1;
256     dd->xs = dd->xs/nc;
257     dd->xe = dd->xe/nc;
258     dd->Xs = dd->Xs/nc;
259     dd->Xe = dd->Xe/nc;
260   }
261 
262   /*
263      We do not provide a getcoloring function in the DMDA operations because
264    the basic DMDA does not know about matrices. We think of DMDA as being
265    more low-level then matrices.
266   */
267   if (dim == 1) {
268     ierr = DMCreateColoring_DA_1d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
269   } else if (dim == 2) {
270     ierr = DMCreateColoring_DA_2d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
271   } else if (dim == 3) {
272     ierr = DMCreateColoring_DA_3d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
273   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not done for %D dimension, send us mail petsc-maint@mcs.anl.gov for code",dim);
274   if (isBAIJ) {
275     dd->w  = nc;
276     dd->xs = dd->xs*nc;
277     dd->xe = dd->xe*nc;
278     dd->Xs = dd->Xs*nc;
279     dd->Xe = dd->Xe*nc;
280   }
281   PetscFunctionReturn(0);
282 }
283 
284 /* ---------------------------------------------------------------------------------*/
285 
286 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
287 {
288   PetscErrorCode  ierr;
289   PetscInt        xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,M,N,dim,s,k,nc,col;
290   PetscInt        ncolors;
291   MPI_Comm        comm;
292   DMBoundaryType  bx,by;
293   DMDAStencilType st;
294   ISColoringValue *colors;
295   DM_DA           *dd = (DM_DA*)da->data;
296 
297   PetscFunctionBegin;
298   /*
299          nc - number of components per grid point
300          col - number of colors needed in one direction for single component problem
301 
302   */
303   ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,&M,&N,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr);
304   col  = 2*s + 1;
305   ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr);
306   ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr);
307   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
308 
309   /* special case as taught to us by Paul Hovland */
310   if (st == DMDA_STENCIL_STAR && s == 1) {
311     ierr = DMCreateColoring_DA_2d_5pt_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
312   } else {
313     if (ctype == IS_COLORING_GLOBAL) {
314       if (!dd->localcoloring) {
315         ierr = PetscMalloc1(nc*nx*ny,&colors);CHKERRQ(ierr);
316         ii   = 0;
317         for (j=ys; j<ys+ny; j++) {
318           for (i=xs; i<xs+nx; i++) {
319             for (k=0; k<nc; k++) {
320               colors[ii++] = k + nc*((i % col) + col*(j % col));
321             }
322           }
323         }
324         ncolors = nc + nc*(col-1 + col*(col-1));
325         ierr    = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
326       }
327       *coloring = dd->localcoloring;
328     } else if (ctype == IS_COLORING_LOCAL) {
329       if (!dd->ghostedcoloring) {
330         ierr = PetscMalloc1(nc*gnx*gny,&colors);CHKERRQ(ierr);
331         ii   = 0;
332         for (j=gys; j<gys+gny; j++) {
333           for (i=gxs; i<gxs+gnx; i++) {
334             for (k=0; k<nc; k++) {
335               /* the complicated stuff is to handle periodic boundaries */
336               colors[ii++] = k + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col));
337             }
338           }
339         }
340         ncolors = nc + nc*(col - 1 + col*(col-1));
341         ierr    = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
342         /* PetscIntView(ncolors,(PetscInt*)colors,0); */
343 
344         ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
345       }
346       *coloring = dd->ghostedcoloring;
347     } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
348   }
349   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
350   PetscFunctionReturn(0);
351 }
352 
353 /* ---------------------------------------------------------------------------------*/
354 
355 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
356 {
357   PetscErrorCode  ierr;
358   PetscInt        xs,ys,nx,ny,i,j,gxs,gys,gnx,gny,m,n,p,dim,s,k,nc,col,zs,gzs,ii,l,nz,gnz,M,N,P;
359   PetscInt        ncolors;
360   MPI_Comm        comm;
361   DMBoundaryType  bx,by,bz;
362   DMDAStencilType st;
363   ISColoringValue *colors;
364   DM_DA           *dd = (DM_DA*)da->data;
365 
366   PetscFunctionBegin;
367   /*
368          nc - number of components per grid point
369          col - number of colors needed in one direction for single component problem
370 
371   */
372   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
373   col  = 2*s + 1;
374   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
375   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
376   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
377 
378   /* create the coloring */
379   if (ctype == IS_COLORING_GLOBAL) {
380     if (!dd->localcoloring) {
381       ierr = PetscMalloc1(nc*nx*ny*nz,&colors);CHKERRQ(ierr);
382       ii   = 0;
383       for (k=zs; k<zs+nz; k++) {
384         for (j=ys; j<ys+ny; j++) {
385           for (i=xs; i<xs+nx; i++) {
386             for (l=0; l<nc; l++) {
387               colors[ii++] = l + nc*((i % col) + col*(j % col) + col*col*(k % col));
388             }
389           }
390         }
391       }
392       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
393       ierr    = ISColoringCreate(comm,ncolors,nc*nx*ny*nz,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
394     }
395     *coloring = dd->localcoloring;
396   } else if (ctype == IS_COLORING_LOCAL) {
397     if (!dd->ghostedcoloring) {
398       ierr = PetscMalloc1(nc*gnx*gny*gnz,&colors);CHKERRQ(ierr);
399       ii   = 0;
400       for (k=gzs; k<gzs+gnz; k++) {
401         for (j=gys; j<gys+gny; j++) {
402           for (i=gxs; i<gxs+gnx; i++) {
403             for (l=0; l<nc; l++) {
404               /* the complicated stuff is to handle periodic boundaries */
405               colors[ii++] = l + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col) + col*col*(SetInRange(k,p) % col));
406             }
407           }
408         }
409       }
410       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
411       ierr    = ISColoringCreate(comm,ncolors,nc*gnx*gny*gnz,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
412       ierr    = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
413     }
414     *coloring = dd->ghostedcoloring;
415   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
416   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
417   PetscFunctionReturn(0);
418 }
419 
420 /* ---------------------------------------------------------------------------------*/
421 
422 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
423 {
424   PetscErrorCode  ierr;
425   PetscInt        xs,nx,i,i1,gxs,gnx,l,m,M,dim,s,nc,col;
426   PetscInt        ncolors;
427   MPI_Comm        comm;
428   DMBoundaryType  bx;
429   ISColoringValue *colors;
430   DM_DA           *dd = (DM_DA*)da->data;
431 
432   PetscFunctionBegin;
433   /*
434          nc - number of components per grid point
435          col - number of colors needed in one direction for single component problem
436 
437   */
438   ierr = DMDAGetInfo(da,&dim,&m,NULL,NULL,&M,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);CHKERRQ(ierr);
439   col  = 2*s + 1;
440   ierr = DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);CHKERRQ(ierr);
441   ierr = DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);CHKERRQ(ierr);
442   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
443 
444   /* create the coloring */
445   if (ctype == IS_COLORING_GLOBAL) {
446     if (!dd->localcoloring) {
447       ierr = PetscMalloc1(nc*nx,&colors);CHKERRQ(ierr);
448       if (dd->ofillcols) {
449         PetscInt tc = 0;
450         for (i=0; i<nc; i++) tc += (PetscInt) (dd->ofillcols[i] > 0);
451         i1 = 0;
452         for (i=xs; i<xs+nx; i++) {
453           for (l=0; l<nc; l++) {
454             if (dd->ofillcols[l] && (i % col)) {
455               colors[i1++] =  nc - 1 + tc*((i % col) - 1) + dd->ofillcols[l];
456             } else {
457               colors[i1++] = l;
458             }
459           }
460         }
461         ncolors = nc + 2*s*tc;
462       } else {
463         i1 = 0;
464         for (i=xs; i<xs+nx; i++) {
465           for (l=0; l<nc; l++) {
466             colors[i1++] = l + nc*(i % col);
467           }
468         }
469         ncolors = nc + nc*(col-1);
470       }
471       ierr = ISColoringCreate(comm,ncolors,nc*nx,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
472     }
473     *coloring = dd->localcoloring;
474   } else if (ctype == IS_COLORING_LOCAL) {
475     if (!dd->ghostedcoloring) {
476       ierr = PetscMalloc1(nc*gnx,&colors);CHKERRQ(ierr);
477       i1   = 0;
478       for (i=gxs; i<gxs+gnx; i++) {
479         for (l=0; l<nc; l++) {
480           /* the complicated stuff is to handle periodic boundaries */
481           colors[i1++] = l + nc*(SetInRange(i,m) % col);
482         }
483       }
484       ncolors = nc + nc*(col-1);
485       ierr    = ISColoringCreate(comm,ncolors,nc*gnx,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
486       ierr    = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
487     }
488     *coloring = dd->ghostedcoloring;
489   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
490   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
491   PetscFunctionReturn(0);
492 }
493 
494 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
495 {
496   PetscErrorCode  ierr;
497   PetscInt        xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,dim,s,k,nc;
498   PetscInt        ncolors;
499   MPI_Comm        comm;
500   DMBoundaryType  bx,by;
501   ISColoringValue *colors;
502   DM_DA           *dd = (DM_DA*)da->data;
503 
504   PetscFunctionBegin;
505   /*
506          nc - number of components per grid point
507          col - number of colors needed in one direction for single component problem
508 
509   */
510   ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,NULL);CHKERRQ(ierr);
511   ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr);
512   ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr);
513   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
514   /* create the coloring */
515   if (ctype == IS_COLORING_GLOBAL) {
516     if (!dd->localcoloring) {
517       ierr = PetscMalloc1(nc*nx*ny,&colors);CHKERRQ(ierr);
518       ii   = 0;
519       for (j=ys; j<ys+ny; j++) {
520         for (i=xs; i<xs+nx; i++) {
521           for (k=0; k<nc; k++) {
522             colors[ii++] = k + nc*((3*j+i) % 5);
523           }
524         }
525       }
526       ncolors = 5*nc;
527       ierr    = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
528     }
529     *coloring = dd->localcoloring;
530   } else if (ctype == IS_COLORING_LOCAL) {
531     if (!dd->ghostedcoloring) {
532       ierr = PetscMalloc1(nc*gnx*gny,&colors);CHKERRQ(ierr);
533       ii = 0;
534       for (j=gys; j<gys+gny; j++) {
535         for (i=gxs; i<gxs+gnx; i++) {
536           for (k=0; k<nc; k++) {
537             colors[ii++] = k + nc*((3*SetInRange(j,n) + SetInRange(i,m)) % 5);
538           }
539         }
540       }
541       ncolors = 5*nc;
542       ierr    = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
543       ierr    = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
544     }
545     *coloring = dd->ghostedcoloring;
546   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
547   PetscFunctionReturn(0);
548 }
549 
550 /* =========================================================================== */
551 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM,Mat,PetscBool);
552 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM,Mat);
553 extern PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM,Mat,PetscBool);
554 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM,Mat,PetscBool);
555 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM,Mat);
556 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM,Mat,PetscBool);
557 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM,Mat);
558 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM,Mat);
559 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM,Mat);
560 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM,Mat);
561 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM,Mat);
562 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM,Mat);
563 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM,Mat);
564 extern PetscErrorCode DMCreateMatrix_DA_IS(DM,Mat);
565 
566 /*@C
567    MatSetupDM - Sets the DMDA that is to be used by the HYPRE_StructMatrix PETSc matrix
568 
569    Logically Collective on mat
570 
571    Input Parameters:
572 +  mat - the matrix
573 -  da - the da
574 
575    Level: intermediate
576 
577 @*/
578 PetscErrorCode MatSetupDM(Mat mat,DM da)
579 {
580   PetscErrorCode ierr;
581 
582   PetscFunctionBegin;
583   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
584   PetscValidHeaderSpecificType(da,DM_CLASSID,1,DMDA);
585   ierr = PetscTryMethod(mat,"MatSetupDM_C",(Mat,DM),(mat,da));CHKERRQ(ierr);
586   PetscFunctionReturn(0);
587 }
588 
589 PetscErrorCode  MatView_MPI_DA(Mat A,PetscViewer viewer)
590 {
591   DM                da;
592   PetscErrorCode    ierr;
593   const char        *prefix;
594   Mat               Anatural;
595   AO                ao;
596   PetscInt          rstart,rend,*petsc,i;
597   IS                is;
598   MPI_Comm          comm;
599   PetscViewerFormat format;
600 
601   PetscFunctionBegin;
602   /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */
603   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
604   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(0);
605 
606   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
607   ierr = MatGetDM(A, &da);CHKERRQ(ierr);
608   if (!da) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
609 
610   ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);
611   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
612   ierr = PetscMalloc1(rend-rstart,&petsc);CHKERRQ(ierr);
613   for (i=rstart; i<rend; i++) petsc[i-rstart] = i;
614   ierr = AOApplicationToPetsc(ao,rend-rstart,petsc);CHKERRQ(ierr);
615   ierr = ISCreateGeneral(comm,rend-rstart,petsc,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
616 
617   /* call viewer on natural ordering */
618   ierr = MatCreateSubMatrix(A,is,is,MAT_INITIAL_MATRIX,&Anatural);CHKERRQ(ierr);
619   ierr = ISDestroy(&is);CHKERRQ(ierr);
620   ierr = PetscObjectGetOptionsPrefix((PetscObject)A,&prefix);CHKERRQ(ierr);
621   ierr = PetscObjectSetOptionsPrefix((PetscObject)Anatural,prefix);CHKERRQ(ierr);
622   ierr = PetscObjectSetName((PetscObject)Anatural,((PetscObject)A)->name);CHKERRQ(ierr);
623   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE;
624   ierr = MatView(Anatural,viewer);CHKERRQ(ierr);
625   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE;
626   ierr = MatDestroy(&Anatural);CHKERRQ(ierr);
627   PetscFunctionReturn(0);
628 }
629 
630 PetscErrorCode  MatLoad_MPI_DA(Mat A,PetscViewer viewer)
631 {
632   DM             da;
633   PetscErrorCode ierr;
634   Mat            Anatural,Aapp;
635   AO             ao;
636   PetscInt       rstart,rend,*app,i,m,n,M,N;
637   IS             is;
638   MPI_Comm       comm;
639 
640   PetscFunctionBegin;
641   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
642   ierr = MatGetDM(A, &da);CHKERRQ(ierr);
643   if (!da) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
644 
645   /* Load the matrix in natural ordering */
646   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Anatural);CHKERRQ(ierr);
647   ierr = MatSetType(Anatural,((PetscObject)A)->type_name);CHKERRQ(ierr);
648   ierr = MatGetSize(A,&M,&N);CHKERRQ(ierr);
649   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
650   ierr = MatSetSizes(Anatural,m,n,M,N);CHKERRQ(ierr);
651   ierr = MatLoad(Anatural,viewer);CHKERRQ(ierr);
652 
653   /* Map natural ordering to application ordering and create IS */
654   ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);
655   ierr = MatGetOwnershipRange(Anatural,&rstart,&rend);CHKERRQ(ierr);
656   ierr = PetscMalloc1(rend-rstart,&app);CHKERRQ(ierr);
657   for (i=rstart; i<rend; i++) app[i-rstart] = i;
658   ierr = AOPetscToApplication(ao,rend-rstart,app);CHKERRQ(ierr);
659   ierr = ISCreateGeneral(comm,rend-rstart,app,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
660 
661   /* Do permutation and replace header */
662   ierr = MatCreateSubMatrix(Anatural,is,is,MAT_INITIAL_MATRIX,&Aapp);CHKERRQ(ierr);
663   ierr = MatHeaderReplace(A,&Aapp);CHKERRQ(ierr);
664   ierr = ISDestroy(&is);CHKERRQ(ierr);
665   ierr = MatDestroy(&Anatural);CHKERRQ(ierr);
666   PetscFunctionReturn(0);
667 }
668 
669 PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J)
670 {
671   PetscErrorCode ierr;
672   PetscInt       dim,dof,nx,ny,nz,dims[3],starts[3],M,N,P;
673   Mat            A;
674   MPI_Comm       comm;
675   MatType        Atype;
676   void           (*aij)(void)=NULL,(*baij)(void)=NULL,(*sbaij)(void)=NULL,(*sell)(void)=NULL,(*is)(void)=NULL;
677   MatType        mtype;
678   PetscMPIInt    size;
679   DM_DA          *dd = (DM_DA*)da->data;
680 
681   PetscFunctionBegin;
682   ierr = MatInitializePackage();CHKERRQ(ierr);
683   mtype = da->mattype;
684 
685   /*
686                                   m
687           ------------------------------------------------------
688          |                                                     |
689          |                                                     |
690          |               ----------------------                |
691          |               |                    |                |
692       n  |           ny  |                    |                |
693          |               |                    |                |
694          |               .---------------------                |
695          |             (xs,ys)     nx                          |
696          |            .                                        |
697          |         (gxs,gys)                                   |
698          |                                                     |
699           -----------------------------------------------------
700   */
701 
702   /*
703          nc - number of components per grid point
704          col - number of colors needed in one direction for single component problem
705 
706   */
707   M   = dd->M;
708   N   = dd->N;
709   P   = dd->P;
710   dim = da->dim;
711   dof = dd->w;
712   /* ierr = DMDAGetInfo(da,&dim,&M,&N,&P,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL);CHKERRQ(ierr); */
713   ierr = DMDAGetCorners(da,NULL,NULL,NULL,&nx,&ny,&nz);CHKERRQ(ierr);
714   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
715   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
716   ierr = MatSetSizes(A,dof*nx*ny*nz,dof*nx*ny*nz,dof*M*N*P,dof*M*N*P);CHKERRQ(ierr);
717   ierr = MatSetType(A,mtype);CHKERRQ(ierr);
718   ierr = MatSetFromOptions(A);CHKERRQ(ierr);
719   ierr = MatSetDM(A,da);CHKERRQ(ierr);
720   if (da->structure_only) {
721     ierr = MatSetOption(A,MAT_STRUCTURE_ONLY,PETSC_TRUE);CHKERRQ(ierr);
722   }
723   ierr = MatGetType(A,&Atype);CHKERRQ(ierr);
724   /*
725      We do not provide a getmatrix function in the DMDA operations because
726    the basic DMDA does not know about matrices. We think of DMDA as being more
727    more low-level than matrices. This is kind of cheating but, cause sometimes
728    we think of DMDA has higher level than matrices.
729 
730      We could switch based on Atype (or mtype), but we do not since the
731    specialized setting routines depend only on the particular preallocation
732    details of the matrix, not the type itself.
733   */
734   ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJSetPreallocation_C",&aij);CHKERRQ(ierr);
735   if (!aij) {
736     ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJSetPreallocation_C",&aij);CHKERRQ(ierr);
737   }
738   if (!aij) {
739     ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIBAIJSetPreallocation_C",&baij);CHKERRQ(ierr);
740     if (!baij) {
741       ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",&baij);CHKERRQ(ierr);
742     }
743     if (!baij) {
744       ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPISBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr);
745       if (!sbaij) {
746         ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr);
747       }
748       if (!sbaij) {
749         ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPISELLSetPreallocation_C",&sell);CHKERRQ(ierr);
750         if (!sell) {
751           ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqSELLSetPreallocation_C",&sell);CHKERRQ(ierr);
752         }
753       }
754       if (!sell) {
755         ierr = PetscObjectQueryFunction((PetscObject)A,"MatISSetPreallocation_C",&is);CHKERRQ(ierr);
756       }
757     }
758   }
759   if (aij) {
760     if (dim == 1) {
761       if (dd->ofill) {
762         ierr = DMCreateMatrix_DA_1d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
763       } else {
764         DMBoundaryType bx;
765         PetscMPIInt  size;
766         ierr = DMDAGetInfo(da,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&bx,NULL,NULL,NULL);CHKERRQ(ierr);
767         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)da),&size);CHKERRQ(ierr);
768         if (size == 1 && bx == DM_BOUNDARY_NONE) {
769           ierr = DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(da,A,PETSC_FALSE);CHKERRQ(ierr);
770         } else {
771           ierr = DMCreateMatrix_DA_1d_MPIAIJ(da,A,PETSC_FALSE);CHKERRQ(ierr);
772         }
773       }
774     } else if (dim == 2) {
775       if (dd->ofill) {
776         ierr = DMCreateMatrix_DA_2d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
777       } else {
778         ierr = DMCreateMatrix_DA_2d_MPIAIJ(da,A,PETSC_FALSE);CHKERRQ(ierr);
779       }
780     } else if (dim == 3) {
781       if (dd->ofill) {
782         ierr = DMCreateMatrix_DA_3d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
783       } else {
784         ierr = DMCreateMatrix_DA_3d_MPIAIJ(da,A,PETSC_FALSE);CHKERRQ(ierr);
785       }
786     }
787   } else if (baij) {
788     if (dim == 2) {
789       ierr = DMCreateMatrix_DA_2d_MPIBAIJ(da,A);CHKERRQ(ierr);
790     } else if (dim == 3) {
791       ierr = DMCreateMatrix_DA_3d_MPIBAIJ(da,A);CHKERRQ(ierr);
792     } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
793   } else if (sbaij) {
794     if (dim == 2) {
795       ierr = DMCreateMatrix_DA_2d_MPISBAIJ(da,A);CHKERRQ(ierr);
796     } else if (dim == 3) {
797       ierr = DMCreateMatrix_DA_3d_MPISBAIJ(da,A);CHKERRQ(ierr);
798     } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
799   } else if (sell) {
800      if (dim == 2) {
801        ierr = DMCreateMatrix_DA_2d_MPISELL(da,A);CHKERRQ(ierr);
802      } else if (dim == 3) {
803        ierr = DMCreateMatrix_DA_3d_MPISELL(da,A);CHKERRQ(ierr);
804      } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
805   } else if (is) {
806     ierr = DMCreateMatrix_DA_IS(da,A);CHKERRQ(ierr);
807   } else {
808     ISLocalToGlobalMapping ltog;
809 
810     ierr = MatSetBlockSize(A,dof);CHKERRQ(ierr);
811     ierr = MatSetUp(A);CHKERRQ(ierr);
812     ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
813     ierr = MatSetLocalToGlobalMapping(A,ltog,ltog);CHKERRQ(ierr);
814   }
815   ierr = DMDAGetGhostCorners(da,&starts[0],&starts[1],&starts[2],&dims[0],&dims[1],&dims[2]);CHKERRQ(ierr);
816   ierr = MatSetStencil(A,dim,dims,starts,dof);CHKERRQ(ierr);
817   ierr = MatSetDM(A,da);CHKERRQ(ierr);
818   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
819   if (size > 1) {
820     /* change viewer to display matrix in natural ordering */
821     ierr = MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA);CHKERRQ(ierr);
822     ierr = MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA);CHKERRQ(ierr);
823   }
824   *J = A;
825   PetscFunctionReturn(0);
826 }
827 
828 /* ---------------------------------------------------------------------------------*/
829 PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
830 
831 PetscErrorCode DMCreateMatrix_DA_IS(DM dm,Mat J)
832 {
833   DM_DA                  *da = (DM_DA*)dm->data;
834   Mat                    lJ;
835   ISLocalToGlobalMapping ltog;
836   IS                     is_loc_filt, is_glob;
837   const PetscInt         *e_loc,*idx;
838   PetscInt               nel,nen,nv,dof,dim,*gidx,nb;
839   PetscBool              flg;
840   PetscErrorCode         ierr;
841 
842   /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted)
843      We need to filter the local indices that are represented through the DMDAGetElements decomposition
844      This is because the size of the local matrices in MATIS is the local size of the l2g map */
845   PetscFunctionBegin;
846   dof  = da->w;
847   dim  = dm->dim;
848 
849   ierr = MatSetBlockSize(J,dof);CHKERRQ(ierr);
850 
851   /* get local elements indices in local DMDA numbering */
852   ierr = DMDAGetElements(dm,&nel,&nen,&e_loc);CHKERRQ(ierr); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */
853   ierr = ISCreateBlock(PetscObjectComm((PetscObject)dm),dof,nel*nen,e_loc,PETSC_COPY_VALUES,&is_loc_filt);CHKERRQ(ierr);
854   ierr = DMDARestoreElements(dm,&nel,&nen,&e_loc);CHKERRQ(ierr);
855 
856   /* obtain a consistent local ordering for MATIS */
857   ierr = ISSortRemoveDups(is_loc_filt);CHKERRQ(ierr);
858   ierr = ISBlockGetLocalSize(is_loc_filt,&nb);CHKERRQ(ierr);
859   ierr = DMGetLocalToGlobalMapping(dm,&ltog);CHKERRQ(ierr);
860   ierr = ISLocalToGlobalMappingGetSize(ltog,&nv);CHKERRQ(ierr);
861   ierr = PetscMalloc1(PetscMax(nb,nv/dof),&gidx);CHKERRQ(ierr);
862   ierr = ISBlockGetIndices(is_loc_filt,&idx);CHKERRQ(ierr);
863   ierr = ISLocalToGlobalMappingApplyBlock(ltog,nb,idx,gidx);CHKERRQ(ierr);
864   ierr = ISBlockRestoreIndices(is_loc_filt,&idx);CHKERRQ(ierr);
865   ierr = ISCreateBlock(PetscObjectComm((PetscObject)dm),dof,nb,gidx,PETSC_USE_POINTER,&is_glob);CHKERRQ(ierr);
866   ierr = ISLocalToGlobalMappingCreateIS(is_glob,&ltog);CHKERRQ(ierr);
867   ierr = ISDestroy(&is_glob);CHKERRQ(ierr);
868   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
869   ierr = ISLocalToGlobalMappingDestroy(&ltog);CHKERRQ(ierr);
870 
871   /* We also attach a l2g map to the local matrices to have MatSetValueLocal to work */
872   ierr = MatISGetLocalMat(J,&lJ);CHKERRQ(ierr);
873   ierr = ISLocalToGlobalMappingCreateIS(is_loc_filt,&ltog);CHKERRQ(ierr);
874   ierr = ISDestroy(&is_loc_filt);CHKERRQ(ierr);
875   ierr = ISCreateStride(PetscObjectComm((PetscObject)lJ),nv/dof,0,1,&is_glob);CHKERRQ(ierr);
876   ierr = ISGetIndices(is_glob,&idx);CHKERRQ(ierr);
877   ierr = ISGlobalToLocalMappingApplyBlock(ltog,IS_GTOLM_MASK,nv/dof,idx,&nb,gidx);CHKERRQ(ierr);
878   ierr = ISRestoreIndices(is_glob,&idx);CHKERRQ(ierr);
879   ierr = ISDestroy(&is_glob);CHKERRQ(ierr);
880   ierr = ISLocalToGlobalMappingDestroy(&ltog);CHKERRQ(ierr);
881   ierr = ISCreateBlock(PETSC_COMM_SELF,dof,nb,gidx,PETSC_USE_POINTER,&is_loc_filt);CHKERRQ(ierr);
882   ierr = ISLocalToGlobalMappingCreateIS(is_loc_filt,&ltog);CHKERRQ(ierr);
883   ierr = ISDestroy(&is_loc_filt);CHKERRQ(ierr);
884   ierr = MatSetLocalToGlobalMapping(lJ,ltog,ltog);CHKERRQ(ierr);
885   ierr = ISLocalToGlobalMappingDestroy(&ltog);CHKERRQ(ierr);
886   ierr = PetscFree(gidx);CHKERRQ(ierr);
887 
888   /* Preallocation (not exact): we reuse the preallocation routines of the assembled version  */
889   flg = dm->prealloc_only;
890   dm->prealloc_only = PETSC_TRUE;
891   switch (dim) {
892   case 1:
893     ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",MatISSetPreallocation_IS);CHKERRQ(ierr);
894     ierr = DMCreateMatrix_DA_1d_MPIAIJ(dm,J,PETSC_TRUE);CHKERRQ(ierr);
895     ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
896     break;
897   case 2:
898     ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",MatISSetPreallocation_IS);CHKERRQ(ierr);
899     ierr = DMCreateMatrix_DA_2d_MPIAIJ(dm,J,PETSC_TRUE);CHKERRQ(ierr);
900     ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
901     break;
902   case 3:
903     ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",MatISSetPreallocation_IS);CHKERRQ(ierr);
904     ierr = DMCreateMatrix_DA_3d_MPIAIJ(dm,J,PETSC_TRUE);CHKERRQ(ierr);
905     ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
906     break;
907   default:
908     SETERRQ1(PetscObjectComm((PetscObject)dm),PETSC_ERR_SUP,"Unhandled dimension %d",dim);
909     break;
910   }
911   dm->prealloc_only = flg;
912   PetscFunctionReturn(0);
913 }
914 
915 PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da,Mat J)
916 {
917   PetscErrorCode         ierr;
918   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p;
919   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
920   MPI_Comm               comm;
921   PetscScalar            *values;
922   DMBoundaryType         bx,by;
923   ISLocalToGlobalMapping ltog;
924   DMDAStencilType        st;
925 
926   PetscFunctionBegin;
927   /*
928          nc - number of components per grid point
929          col - number of colors needed in one direction for single component problem
930 
931   */
932   ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr);
933   col  = 2*s + 1;
934   ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr);
935   ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr);
936   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
937 
938   ierr = PetscMalloc2(nc,&rows,col*col*nc*nc,&cols);CHKERRQ(ierr);
939   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
940 
941   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
942   /* determine the matrix preallocation information */
943   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
944   for (i=xs; i<xs+nx; i++) {
945 
946     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
947     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
948 
949     for (j=ys; j<ys+ny; j++) {
950       slot = i - gxs + gnx*(j - gys);
951 
952       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
953       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
954 
955       cnt = 0;
956       for (k=0; k<nc; k++) {
957         for (l=lstart; l<lend+1; l++) {
958           for (p=pstart; p<pend+1; p++) {
959             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
960               cols[cnt++] = k + nc*(slot + gnx*l + p);
961             }
962           }
963         }
964         rows[k] = k + nc*(slot);
965       }
966       ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
967     }
968   }
969   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
970   ierr = MatSeqSELLSetPreallocation(J,0,dnz);CHKERRQ(ierr);
971   ierr = MatMPISELLSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
972   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
973 
974   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
975 
976   /*
977     For each node in the grid: we get the neighbors in the local (on processor ordering
978     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
979     PETSc ordering.
980   */
981   if (!da->prealloc_only) {
982     ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr);
983     for (i=xs; i<xs+nx; i++) {
984 
985       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
986       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
987 
988       for (j=ys; j<ys+ny; j++) {
989         slot = i - gxs + gnx*(j - gys);
990 
991         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
992         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
993 
994         cnt = 0;
995         for (k=0; k<nc; k++) {
996           for (l=lstart; l<lend+1; l++) {
997             for (p=pstart; p<pend+1; p++) {
998               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
999                 cols[cnt++] = k + nc*(slot + gnx*l + p);
1000               }
1001             }
1002           }
1003           rows[k] = k + nc*(slot);
1004         }
1005         ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1006       }
1007     }
1008     ierr = PetscFree(values);CHKERRQ(ierr);
1009     /* do not copy values to GPU since they are all zero and not yet needed there */
1010     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1011     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1012     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1013     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1014     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1015   }
1016   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1017   PetscFunctionReturn(0);
1018 }
1019 
1020 PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da,Mat J)
1021 {
1022   PetscErrorCode         ierr;
1023   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1024   PetscInt               m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL;
1025   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
1026   MPI_Comm               comm;
1027   PetscScalar            *values;
1028   DMBoundaryType         bx,by,bz;
1029   ISLocalToGlobalMapping ltog;
1030   DMDAStencilType        st;
1031 
1032   PetscFunctionBegin;
1033   /*
1034          nc - number of components per grid point
1035          col - number of colors needed in one direction for single component problem
1036 
1037   */
1038   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1039   col  = 2*s + 1;
1040   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1041   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1042   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1043 
1044   ierr = PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols);CHKERRQ(ierr);
1045   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1046 
1047   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1048   /* determine the matrix preallocation information */
1049   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1050   for (i=xs; i<xs+nx; i++) {
1051     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1052     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1053     for (j=ys; j<ys+ny; j++) {
1054       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1055       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1056       for (k=zs; k<zs+nz; k++) {
1057         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1058         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1059 
1060         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1061 
1062         cnt = 0;
1063         for (l=0; l<nc; l++) {
1064           for (ii=istart; ii<iend+1; ii++) {
1065             for (jj=jstart; jj<jend+1; jj++) {
1066               for (kk=kstart; kk<kend+1; kk++) {
1067                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1068                   cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1069                 }
1070               }
1071             }
1072           }
1073           rows[l] = l + nc*(slot);
1074         }
1075         ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1076       }
1077     }
1078   }
1079   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1080   ierr = MatSeqSELLSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1081   ierr = MatMPISELLSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1082   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1083   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1084 
1085   /*
1086     For each node in the grid: we get the neighbors in the local (on processor ordering
1087     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1088     PETSc ordering.
1089   */
1090   if (!da->prealloc_only) {
1091     ierr = PetscCalloc1(col*col*col*nc*nc*nc,&values);CHKERRQ(ierr);
1092     for (i=xs; i<xs+nx; i++) {
1093       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1094       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1095       for (j=ys; j<ys+ny; j++) {
1096         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1097         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1098         for (k=zs; k<zs+nz; k++) {
1099           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1100           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1101 
1102           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1103 
1104           cnt = 0;
1105           for (l=0; l<nc; l++) {
1106             for (ii=istart; ii<iend+1; ii++) {
1107               for (jj=jstart; jj<jend+1; jj++) {
1108                 for (kk=kstart; kk<kend+1; kk++) {
1109                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1110                     cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1111                   }
1112                 }
1113               }
1114             }
1115             rows[l] = l + nc*(slot);
1116           }
1117           ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1118         }
1119       }
1120     }
1121     ierr = PetscFree(values);CHKERRQ(ierr);
1122     /* do not copy values to GPU since they are all zero and not yet needed there */
1123     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1124     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1125     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1126     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1127     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1128   }
1129   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1130   PetscFunctionReturn(0);
1131 }
1132 
1133 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da,Mat J,PetscBool isIS)
1134 {
1135   PetscErrorCode         ierr;
1136   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,M,N;
1137   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
1138   MPI_Comm               comm;
1139   DMBoundaryType         bx,by;
1140   ISLocalToGlobalMapping ltog,mltog;
1141   DMDAStencilType        st;
1142   PetscBool              removedups = PETSC_FALSE;
1143 
1144   PetscFunctionBegin;
1145   /*
1146          nc - number of components per grid point
1147          col - number of colors needed in one direction for single component problem
1148 
1149   */
1150   ierr = DMDAGetInfo(da,&dim,&m,&n,&M,&N,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr);
1151   if (!isIS && bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) {
1152     ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE);CHKERRQ(ierr);
1153   }
1154   col  = 2*s + 1;
1155   /*
1156        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1157        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1158   */
1159   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1160   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1161   ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr);
1162   ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr);
1163   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1164 
1165   ierr = PetscMalloc2(nc,&rows,col*col*nc*nc,&cols);CHKERRQ(ierr);
1166   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1167 
1168   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1169   /* determine the matrix preallocation information */
1170   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
1171   for (i=xs; i<xs+nx; i++) {
1172 
1173     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1174     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1175 
1176     for (j=ys; j<ys+ny; j++) {
1177       slot = i - gxs + gnx*(j - gys);
1178 
1179       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1180       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1181 
1182       cnt = 0;
1183       for (k=0; k<nc; k++) {
1184         for (l=lstart; l<lend+1; l++) {
1185           for (p=pstart; p<pend+1; p++) {
1186             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
1187               cols[cnt++] = k + nc*(slot + gnx*l + p);
1188             }
1189           }
1190         }
1191         rows[k] = k + nc*(slot);
1192       }
1193       if (removedups) {
1194         ierr = MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1195       } else {
1196         ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1197       }
1198     }
1199   }
1200   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1201   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1202   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1203   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1204   ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr);
1205   if (!mltog) {
1206     ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1207   }
1208 
1209   /*
1210     For each node in the grid: we get the neighbors in the local (on processor ordering
1211     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1212     PETSc ordering.
1213   */
1214   if (!da->prealloc_only) {
1215     for (i=xs; i<xs+nx; i++) {
1216 
1217       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1218       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1219 
1220       for (j=ys; j<ys+ny; j++) {
1221         slot = i - gxs + gnx*(j - gys);
1222 
1223         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1224         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1225 
1226         cnt = 0;
1227         for (l=lstart; l<lend+1; l++) {
1228           for (p=pstart; p<pend+1; p++) {
1229             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
1230               cols[cnt++] = nc*(slot + gnx*l + p);
1231               for (k=1; k<nc; k++) {
1232                 cols[cnt] = 1 + cols[cnt-1];cnt++;
1233               }
1234             }
1235           }
1236         }
1237         for (k=0; k<nc; k++) rows[k] = k + nc*(slot);
1238         ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr);
1239       }
1240     }
1241     /* do not copy values to GPU since they are all zero and not yet needed there */
1242     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1243     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1244     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1245     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1246     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1247     if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) {
1248       ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr);
1249     }
1250   }
1251   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1252   PetscFunctionReturn(0);
1253 }
1254 
1255 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da,Mat J)
1256 {
1257   PetscErrorCode         ierr;
1258   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1259   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt,maxcnt = 0,l,p,M,N;
1260   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
1261   DM_DA                  *dd = (DM_DA*)da->data;
1262   PetscInt               ifill_col,*ofill = dd->ofill, *dfill = dd->dfill;
1263   MPI_Comm               comm;
1264   DMBoundaryType         bx,by;
1265   ISLocalToGlobalMapping ltog;
1266   DMDAStencilType        st;
1267   PetscBool              removedups = PETSC_FALSE;
1268 
1269   PetscFunctionBegin;
1270   /*
1271          nc - number of components per grid point
1272          col - number of colors needed in one direction for single component problem
1273 
1274   */
1275   ierr = DMDAGetInfo(da,&dim,&m,&n,&M,&N,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr);
1276   col  = 2*s + 1;
1277   /*
1278        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1279        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1280   */
1281   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1282   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1283   ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr);
1284   ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr);
1285   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1286 
1287   ierr = PetscMalloc1(col*col*nc,&cols);CHKERRQ(ierr);
1288   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1289 
1290   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1291   /* determine the matrix preallocation information */
1292   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
1293   for (i=xs; i<xs+nx; i++) {
1294 
1295     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1296     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1297 
1298     for (j=ys; j<ys+ny; j++) {
1299       slot = i - gxs + gnx*(j - gys);
1300 
1301       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1302       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1303 
1304       for (k=0; k<nc; k++) {
1305         cnt = 0;
1306         for (l=lstart; l<lend+1; l++) {
1307           for (p=pstart; p<pend+1; p++) {
1308             if (l || p) {
1309               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
1310                 for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p);
1311               }
1312             } else {
1313               if (dfill) {
1314                 for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p);
1315               } else {
1316                 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p);
1317               }
1318             }
1319           }
1320         }
1321         row    = k + nc*(slot);
1322         maxcnt = PetscMax(maxcnt,cnt);
1323         if (removedups) {
1324           ierr   = MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1325         } else {
1326           ierr   = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1327         }
1328       }
1329     }
1330   }
1331   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1332   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1333   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1334   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1335 
1336   /*
1337     For each node in the grid: we get the neighbors in the local (on processor ordering
1338     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1339     PETSc ordering.
1340   */
1341   if (!da->prealloc_only) {
1342     for (i=xs; i<xs+nx; i++) {
1343 
1344       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1345       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1346 
1347       for (j=ys; j<ys+ny; j++) {
1348         slot = i - gxs + gnx*(j - gys);
1349 
1350         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1351         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1352 
1353         for (k=0; k<nc; k++) {
1354           cnt = 0;
1355           for (l=lstart; l<lend+1; l++) {
1356             for (p=pstart; p<pend+1; p++) {
1357               if (l || p) {
1358                 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
1359                   for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p);
1360                 }
1361               } else {
1362                 if (dfill) {
1363                   for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p);
1364                 } else {
1365                   for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p);
1366                 }
1367               }
1368             }
1369           }
1370           row  = k + nc*(slot);
1371           ierr = MatSetValuesLocal(J,1,&row,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr);
1372         }
1373       }
1374     }
1375     /* do not copy values to GPU since they are all zero and not yet needed there */
1376     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1377     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1378     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1379     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1380     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1381   }
1382   ierr = PetscFree(cols);CHKERRQ(ierr);
1383   PetscFunctionReturn(0);
1384 }
1385 
1386 /* ---------------------------------------------------------------------------------*/
1387 
1388 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da,Mat J,PetscBool isIS)
1389 {
1390   PetscErrorCode         ierr;
1391   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1392   PetscInt               m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL;
1393   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
1394   MPI_Comm               comm;
1395   DMBoundaryType         bx,by,bz;
1396   ISLocalToGlobalMapping ltog,mltog;
1397   DMDAStencilType        st;
1398   PetscBool              removedups = PETSC_FALSE;
1399 
1400   PetscFunctionBegin;
1401   /*
1402          nc - number of components per grid point
1403          col - number of colors needed in one direction for single component problem
1404 
1405   */
1406   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1407   if (!isIS && bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) {
1408     ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE);CHKERRQ(ierr);
1409   }
1410   col  = 2*s + 1;
1411 
1412   /*
1413        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1414        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1415   */
1416   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1417   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1418   if (P == 1 && 2*s >= p) removedups = PETSC_TRUE;
1419 
1420   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1421   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1422   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1423 
1424   ierr = PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols);CHKERRQ(ierr);
1425   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1426 
1427   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1428   /* determine the matrix preallocation information */
1429   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1430   for (i=xs; i<xs+nx; i++) {
1431     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1432     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1433     for (j=ys; j<ys+ny; j++) {
1434       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1435       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1436       for (k=zs; k<zs+nz; k++) {
1437         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1438         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1439 
1440         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1441 
1442         cnt = 0;
1443         for (l=0; l<nc; l++) {
1444           for (ii=istart; ii<iend+1; ii++) {
1445             for (jj=jstart; jj<jend+1; jj++) {
1446               for (kk=kstart; kk<kend+1; kk++) {
1447                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1448                   cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1449                 }
1450               }
1451             }
1452           }
1453           rows[l] = l + nc*(slot);
1454         }
1455         if (removedups) {
1456           ierr = MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1457         } else {
1458           ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1459         }
1460       }
1461     }
1462   }
1463   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1464   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1465   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1466   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1467   ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr);
1468   if (!mltog) {
1469     ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1470   }
1471 
1472   /*
1473     For each node in the grid: we get the neighbors in the local (on processor ordering
1474     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1475     PETSc ordering.
1476   */
1477   if (!da->prealloc_only) {
1478     for (i=xs; i<xs+nx; i++) {
1479       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1480       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1481       for (j=ys; j<ys+ny; j++) {
1482         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1483         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1484         for (k=zs; k<zs+nz; k++) {
1485           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1486           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1487 
1488           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1489 
1490           cnt = 0;
1491           for (kk=kstart; kk<kend+1; kk++) {
1492             for (jj=jstart; jj<jend+1; jj++) {
1493               for (ii=istart; ii<iend+1; ii++) {
1494                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1495                   cols[cnt++] = nc*(slot + ii + gnx*jj + gnx*gny*kk);
1496                     for (l=1; l<nc; l++) {
1497                       cols[cnt] = 1 + cols[cnt-1];cnt++;
1498                   }
1499                 }
1500               }
1501             }
1502           }
1503           rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1];
1504           ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr);
1505         }
1506       }
1507     }
1508     /* do not copy values to GPU since they are all zero and not yet needed there */
1509     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1510     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1511     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1512     if (!isIS && bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) {
1513       ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr);
1514     }
1515     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1516     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1517   }
1518   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1519   PetscFunctionReturn(0);
1520 }
1521 
1522 /* ---------------------------------------------------------------------------------*/
1523 
1524 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da,Mat J)
1525 {
1526   PetscErrorCode         ierr;
1527   DM_DA                  *dd = (DM_DA*)da->data;
1528   PetscInt               xs,nx,i,j,gxs,gnx,row,k,l;
1529   PetscInt               m,dim,s,*cols = NULL,nc,cnt,maxcnt = 0,*ocols;
1530   PetscInt               *ofill = dd->ofill,*dfill = dd->dfill;
1531   DMBoundaryType         bx;
1532   ISLocalToGlobalMapping ltog;
1533   PetscMPIInt            rank,size;
1534 
1535   PetscFunctionBegin;
1536   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)da),&rank);CHKERRQ(ierr);
1537   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)da),&size);CHKERRQ(ierr);
1538 
1539   /*
1540          nc - number of components per grid point
1541 
1542   */
1543   ierr = DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);CHKERRQ(ierr);
1544   if (s > 1) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Matrix creation for 1d not implemented correctly for stencil width larger than 1");
1545   ierr = DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);CHKERRQ(ierr);
1546   ierr = DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);CHKERRQ(ierr);
1547 
1548   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1549   ierr = PetscCalloc2(nx*nc,&cols,nx*nc,&ocols);CHKERRQ(ierr);
1550 
1551   /*
1552         note should be smaller for first and last process with no periodic
1553         does not handle dfill
1554   */
1555   cnt = 0;
1556   /* coupling with process to the left */
1557   for (i=0; i<s; i++) {
1558     for (j=0; j<nc; j++) {
1559       ocols[cnt] = ((!rank) ? 0 : (s - i)*(ofill[j+1] - ofill[j]));
1560       cols[cnt]  = dfill[j+1] - dfill[j] + (s + i)*(ofill[j+1] - ofill[j]);
1561       if (!rank && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1562         if (size > 1) ocols[cnt] += (s - i)*(ofill[j+1] - ofill[j]);
1563         else cols[cnt] += (s - i)*(ofill[j+1] - ofill[j]);
1564       }
1565       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1566       cnt++;
1567     }
1568   }
1569   for (i=s; i<nx-s; i++) {
1570     for (j=0; j<nc; j++) {
1571       cols[cnt] = dfill[j+1] - dfill[j] + 2*s*(ofill[j+1] - ofill[j]);
1572       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1573       cnt++;
1574     }
1575   }
1576   /* coupling with process to the right */
1577   for (i=nx-s; i<nx; i++) {
1578     for (j=0; j<nc; j++) {
1579       ocols[cnt] = ((rank == (size-1)) ? 0 : (i - nx + s + 1)*(ofill[j+1] - ofill[j]));
1580       cols[cnt]  = dfill[j+1] - dfill[j] + (s + nx - i - 1)*(ofill[j+1] - ofill[j]);
1581       if ((rank == size-1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1582         if (size > 1) ocols[cnt] += (i - nx + s + 1)*(ofill[j+1] - ofill[j]);
1583         else cols[cnt] += (i - nx + s + 1)*(ofill[j+1] - ofill[j]);
1584       }
1585       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1586       cnt++;
1587     }
1588   }
1589 
1590   ierr = MatSeqAIJSetPreallocation(J,0,cols);CHKERRQ(ierr);
1591   ierr = MatMPIAIJSetPreallocation(J,0,cols,0,ocols);CHKERRQ(ierr);
1592   ierr = PetscFree2(cols,ocols);CHKERRQ(ierr);
1593 
1594   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1595   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1596 
1597   /*
1598     For each node in the grid: we get the neighbors in the local (on processor ordering
1599     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1600     PETSc ordering.
1601   */
1602   if (!da->prealloc_only) {
1603     ierr = PetscMalloc1(maxcnt,&cols);CHKERRQ(ierr);
1604     row = xs*nc;
1605     /* coupling with process to the left */
1606     for (i=xs; i<xs+s; i++) {
1607       for (j=0; j<nc; j++) {
1608         cnt = 0;
1609         if (rank) {
1610           for (l=0; l<s; l++) {
1611             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1612           }
1613         }
1614         if (!rank && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1615           for (l=0; l<s; l++) {
1616             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (m + i - s - l)*nc + ofill[k];
1617           }
1618         }
1619         if (dfill) {
1620           for (k=dfill[j]; k<dfill[j+1]; k++) {
1621             cols[cnt++] = i*nc + dfill[k];
1622           }
1623         } else {
1624           for (k=0; k<nc; k++) {
1625             cols[cnt++] = i*nc + k;
1626           }
1627         }
1628         for (l=0; l<s; l++) {
1629           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1630         }
1631         ierr = MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr);
1632         row++;
1633       }
1634     }
1635     for (i=xs+s; i<xs+nx-s; i++) {
1636       for (j=0; j<nc; j++) {
1637         cnt = 0;
1638         for (l=0; l<s; l++) {
1639           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1640         }
1641         if (dfill) {
1642           for (k=dfill[j]; k<dfill[j+1]; k++) {
1643             cols[cnt++] = i*nc + dfill[k];
1644           }
1645         } else {
1646           for (k=0; k<nc; k++) {
1647             cols[cnt++] = i*nc + k;
1648           }
1649         }
1650         for (l=0; l<s; l++) {
1651           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1652         }
1653         ierr = MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr);
1654         row++;
1655       }
1656     }
1657     /* coupling with process to the right */
1658     for (i=xs+nx-s; i<xs+nx; i++) {
1659       for (j=0; j<nc; j++) {
1660         cnt = 0;
1661         for (l=0; l<s; l++) {
1662           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1663         }
1664         if (dfill) {
1665           for (k=dfill[j]; k<dfill[j+1]; k++) {
1666             cols[cnt++] = i*nc + dfill[k];
1667           }
1668         } else {
1669           for (k=0; k<nc; k++) {
1670             cols[cnt++] = i*nc + k;
1671           }
1672         }
1673         if (rank < size-1) {
1674           for (l=0; l<s; l++) {
1675             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1676           }
1677         }
1678         if ((rank == size-1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1679           for (l=0; l<s; l++) {
1680             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s - l - m + 2)*nc + ofill[k];
1681           }
1682         }
1683         ierr = MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr);
1684         row++;
1685       }
1686     }
1687     ierr = PetscFree(cols);CHKERRQ(ierr);
1688     /* do not copy values to GPU since they are all zero and not yet needed there */
1689     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1690     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1691     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1692     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1693     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1694   }
1695   PetscFunctionReturn(0);
1696 }
1697 
1698 /* ---------------------------------------------------------------------------------*/
1699 
1700 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da,Mat J,PetscBool isIS)
1701 {
1702   PetscErrorCode         ierr;
1703   PetscInt               xs,nx,i,i1,slot,gxs,gnx;
1704   PetscInt               m,dim,s,*cols = NULL,nc,*rows = NULL,col,cnt,l;
1705   PetscInt               istart,iend;
1706   DMBoundaryType         bx;
1707   ISLocalToGlobalMapping ltog,mltog;
1708 
1709   PetscFunctionBegin;
1710   /*
1711          nc - number of components per grid point
1712          col - number of colors needed in one direction for single component problem
1713 
1714   */
1715   ierr = DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);CHKERRQ(ierr);
1716   if (!isIS && bx == DM_BOUNDARY_NONE) {
1717     ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE);CHKERRQ(ierr);
1718   }
1719   col  = 2*s + 1;
1720 
1721   ierr = DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);CHKERRQ(ierr);
1722   ierr = DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);CHKERRQ(ierr);
1723 
1724   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1725   ierr = MatSeqAIJSetPreallocation(J,col*nc,NULL);CHKERRQ(ierr);
1726   ierr = MatMPIAIJSetPreallocation(J,col*nc,NULL,col*nc,NULL);CHKERRQ(ierr);
1727 
1728   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1729   ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr);
1730   if (!mltog) {
1731     ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1732   }
1733 
1734   /*
1735     For each node in the grid: we get the neighbors in the local (on processor ordering
1736     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1737     PETSc ordering.
1738   */
1739   if (!da->prealloc_only) {
1740     ierr = PetscMalloc2(nc,&rows,col*nc*nc,&cols);CHKERRQ(ierr);
1741     for (i=xs; i<xs+nx; i++) {
1742       istart = PetscMax(-s,gxs - i);
1743       iend   = PetscMin(s,gxs + gnx - i - 1);
1744       slot   = i - gxs;
1745 
1746       cnt = 0;
1747       for (i1=istart; i1<iend+1; i1++) {
1748         cols[cnt++] = nc*(slot + i1);
1749         for (l=1; l<nc; l++) {
1750           cols[cnt] = 1 + cols[cnt-1];cnt++;
1751         }
1752       }
1753       rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1];
1754       ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr);
1755     }
1756     /* do not copy values to GPU since they are all zero and not yet needed there */
1757     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1758     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1759     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1760     if (!isIS && bx == DM_BOUNDARY_NONE) {
1761       ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr);
1762     }
1763     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1764     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1765     ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1766   }
1767   PetscFunctionReturn(0);
1768 }
1769 
1770 /* ---------------------------------------------------------------------------------*/
1771 
1772 PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM da,Mat J,PetscBool isIS)
1773 {
1774   PetscErrorCode         ierr;
1775   PetscInt               xs,nx,i,i1,slot,gxs,gnx;
1776   PetscInt               m,dim,s,*cols = NULL,nc,*rows = NULL,col,cnt,l;
1777   PetscInt               istart,iend;
1778   DMBoundaryType         bx;
1779   ISLocalToGlobalMapping ltog,mltog;
1780 
1781   PetscFunctionBegin;
1782   /*
1783          nc - number of components per grid point
1784          col - number of colors needed in one direction for single component problem
1785   */
1786   ierr = DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);CHKERRQ(ierr);
1787   col  = 2*s + 1;
1788 
1789   ierr = DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);CHKERRQ(ierr);
1790   ierr = DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);CHKERRQ(ierr);
1791 
1792   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1793   ierr = MatSeqAIJSetTotalPreallocation(J,nx*nc*col*nc);CHKERRQ(ierr);
1794 
1795   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1796   ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr);
1797   if (!mltog) {
1798     ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1799   }
1800 
1801   /*
1802     For each node in the grid: we get the neighbors in the local (on processor ordering
1803     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1804     PETSc ordering.
1805   */
1806   if (!da->prealloc_only) {
1807     ierr = PetscMalloc2(nc,&rows,col*nc*nc,&cols);CHKERRQ(ierr);
1808     for (i=xs; i<xs+nx; i++) {
1809       istart = PetscMax(-s,gxs - i);
1810       iend   = PetscMin(s,gxs + gnx - i - 1);
1811       slot   = i - gxs;
1812 
1813       cnt = 0;
1814       for (i1=istart; i1<iend+1; i1++) {
1815         cols[cnt++] = nc*(slot + i1);
1816         for (l=1; l<nc; l++) {
1817           cols[cnt] = 1 + cols[cnt-1];cnt++;
1818         }
1819       }
1820       rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1];
1821       ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr);
1822     }
1823     /* do not copy values to GPU since they are all zero and not yet needed there */
1824     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1825     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1826     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1827     if (!isIS && bx == DM_BOUNDARY_NONE) {
1828       ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr);
1829     }
1830     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1831     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1832     ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1833   }
1834   ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr);
1835   PetscFunctionReturn(0);
1836 }
1837 
1838 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da,Mat J)
1839 {
1840   PetscErrorCode         ierr;
1841   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1842   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
1843   PetscInt               istart,iend,jstart,jend,ii,jj;
1844   MPI_Comm               comm;
1845   PetscScalar            *values;
1846   DMBoundaryType         bx,by;
1847   DMDAStencilType        st;
1848   ISLocalToGlobalMapping ltog;
1849 
1850   PetscFunctionBegin;
1851   /*
1852      nc - number of components per grid point
1853      col - number of colors needed in one direction for single component problem
1854   */
1855   ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr);
1856   col  = 2*s + 1;
1857 
1858   ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr);
1859   ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr);
1860   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1861 
1862   ierr = PetscMalloc1(col*col*nc*nc,&cols);CHKERRQ(ierr);
1863 
1864   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1865 
1866   /* determine the matrix preallocation information */
1867   ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
1868   for (i=xs; i<xs+nx; i++) {
1869     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1870     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1871     for (j=ys; j<ys+ny; j++) {
1872       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1873       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1874       slot   = i - gxs + gnx*(j - gys);
1875 
1876       /* Find block columns in block row */
1877       cnt = 0;
1878       for (ii=istart; ii<iend+1; ii++) {
1879         for (jj=jstart; jj<jend+1; jj++) {
1880           if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1881             cols[cnt++] = slot + ii + gnx*jj;
1882           }
1883         }
1884       }
1885       ierr = MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1886     }
1887   }
1888   ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1889   ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1890   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1891 
1892   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1893 
1894   /*
1895     For each node in the grid: we get the neighbors in the local (on processor ordering
1896     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1897     PETSc ordering.
1898   */
1899   if (!da->prealloc_only) {
1900     ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr);
1901     for (i=xs; i<xs+nx; i++) {
1902       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1903       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1904       for (j=ys; j<ys+ny; j++) {
1905         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1906         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1907         slot = i - gxs + gnx*(j - gys);
1908         cnt  = 0;
1909         for (ii=istart; ii<iend+1; ii++) {
1910           for (jj=jstart; jj<jend+1; jj++) {
1911             if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1912               cols[cnt++] = slot + ii + gnx*jj;
1913             }
1914           }
1915         }
1916         ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1917       }
1918     }
1919     ierr = PetscFree(values);CHKERRQ(ierr);
1920     /* do not copy values to GPU since they are all zero and not yet needed there */
1921     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1922     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1923     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1924     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1925     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1926   }
1927   ierr = PetscFree(cols);CHKERRQ(ierr);
1928   PetscFunctionReturn(0);
1929 }
1930 
1931 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da,Mat J)
1932 {
1933   PetscErrorCode         ierr;
1934   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1935   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
1936   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1937   MPI_Comm               comm;
1938   PetscScalar            *values;
1939   DMBoundaryType         bx,by,bz;
1940   DMDAStencilType        st;
1941   ISLocalToGlobalMapping ltog;
1942 
1943   PetscFunctionBegin;
1944   /*
1945          nc - number of components per grid point
1946          col - number of colors needed in one direction for single component problem
1947 
1948   */
1949   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,NULL,NULL,NULL,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1950   col  = 2*s + 1;
1951 
1952   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1953   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1954   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1955 
1956   ierr = PetscMalloc1(col*col*col,&cols);CHKERRQ(ierr);
1957 
1958   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1959 
1960   /* determine the matrix preallocation information */
1961   ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1962   for (i=xs; i<xs+nx; i++) {
1963     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1964     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1965     for (j=ys; j<ys+ny; j++) {
1966       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1967       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1968       for (k=zs; k<zs+nz; k++) {
1969         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1970         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1971 
1972         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1973 
1974         /* Find block columns in block row */
1975         cnt = 0;
1976         for (ii=istart; ii<iend+1; ii++) {
1977           for (jj=jstart; jj<jend+1; jj++) {
1978             for (kk=kstart; kk<kend+1; kk++) {
1979               if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1980                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1981               }
1982             }
1983           }
1984         }
1985         ierr = MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1986       }
1987     }
1988   }
1989   ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1990   ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1991   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1992 
1993   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1994 
1995   /*
1996     For each node in the grid: we get the neighbors in the local (on processor ordering
1997     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1998     PETSc ordering.
1999   */
2000   if (!da->prealloc_only) {
2001     ierr = PetscCalloc1(col*col*col*nc*nc,&values);CHKERRQ(ierr);
2002     for (i=xs; i<xs+nx; i++) {
2003       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2004       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2005       for (j=ys; j<ys+ny; j++) {
2006         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2007         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2008         for (k=zs; k<zs+nz; k++) {
2009           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2010           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2011 
2012           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2013 
2014           cnt = 0;
2015           for (ii=istart; ii<iend+1; ii++) {
2016             for (jj=jstart; jj<jend+1; jj++) {
2017               for (kk=kstart; kk<kend+1; kk++) {
2018                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
2019                   cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
2020                 }
2021               }
2022             }
2023           }
2024           ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
2025         }
2026       }
2027     }
2028     ierr = PetscFree(values);CHKERRQ(ierr);
2029     /* do not copy values to GPU since they are all zero and not yet needed there */
2030     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
2031     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2032     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2033     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
2034     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2035   }
2036   ierr = PetscFree(cols);CHKERRQ(ierr);
2037   PetscFunctionReturn(0);
2038 }
2039 
2040 /*
2041   This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to
2042   identify in the local ordering with periodic domain.
2043 */
2044 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog,PetscInt *row,PetscInt *cnt,PetscInt col[])
2045 {
2046   PetscErrorCode ierr;
2047   PetscInt       i,n;
2048 
2049   PetscFunctionBegin;
2050   ierr = ISLocalToGlobalMappingApplyBlock(ltog,1,row,row);CHKERRQ(ierr);
2051   ierr = ISLocalToGlobalMappingApplyBlock(ltog,*cnt,col,col);CHKERRQ(ierr);
2052   for (i=0,n=0; i<*cnt; i++) {
2053     if (col[i] >= *row) col[n++] = col[i];
2054   }
2055   *cnt = n;
2056   PetscFunctionReturn(0);
2057 }
2058 
2059 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da,Mat J)
2060 {
2061   PetscErrorCode         ierr;
2062   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
2063   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
2064   PetscInt               istart,iend,jstart,jend,ii,jj;
2065   MPI_Comm               comm;
2066   PetscScalar            *values;
2067   DMBoundaryType         bx,by;
2068   DMDAStencilType        st;
2069   ISLocalToGlobalMapping ltog;
2070 
2071   PetscFunctionBegin;
2072   /*
2073      nc - number of components per grid point
2074      col - number of colors needed in one direction for single component problem
2075   */
2076   ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr);
2077   col  = 2*s + 1;
2078 
2079   ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr);
2080   ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr);
2081   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
2082 
2083   ierr = PetscMalloc1(col*col*nc*nc,&cols);CHKERRQ(ierr);
2084 
2085   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
2086 
2087   /* determine the matrix preallocation information */
2088   ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
2089   for (i=xs; i<xs+nx; i++) {
2090     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2091     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2092     for (j=ys; j<ys+ny; j++) {
2093       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2094       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2095       slot   = i - gxs + gnx*(j - gys);
2096 
2097       /* Find block columns in block row */
2098       cnt = 0;
2099       for (ii=istart; ii<iend+1; ii++) {
2100         for (jj=jstart; jj<jend+1; jj++) {
2101           if (st == DMDA_STENCIL_BOX || !ii || !jj) {
2102             cols[cnt++] = slot + ii + gnx*jj;
2103           }
2104         }
2105       }
2106       ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
2107       ierr = MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
2108     }
2109   }
2110   ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
2111   ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
2112   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2113 
2114   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
2115 
2116   /*
2117     For each node in the grid: we get the neighbors in the local (on processor ordering
2118     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2119     PETSc ordering.
2120   */
2121   if (!da->prealloc_only) {
2122     ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr);
2123     for (i=xs; i<xs+nx; i++) {
2124       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2125       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2126       for (j=ys; j<ys+ny; j++) {
2127         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2128         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2129         slot   = i - gxs + gnx*(j - gys);
2130 
2131         /* Find block columns in block row */
2132         cnt = 0;
2133         for (ii=istart; ii<iend+1; ii++) {
2134           for (jj=jstart; jj<jend+1; jj++) {
2135             if (st == DMDA_STENCIL_BOX || !ii || !jj) {
2136               cols[cnt++] = slot + ii + gnx*jj;
2137             }
2138           }
2139         }
2140         ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
2141         ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
2142       }
2143     }
2144     ierr = PetscFree(values);CHKERRQ(ierr);
2145     /* do not copy values to GPU since they are all zero and not yet needed there */
2146     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
2147     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2148     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2149     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
2150     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2151   }
2152   ierr = PetscFree(cols);CHKERRQ(ierr);
2153   PetscFunctionReturn(0);
2154 }
2155 
2156 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da,Mat J)
2157 {
2158   PetscErrorCode         ierr;
2159   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
2160   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
2161   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
2162   MPI_Comm               comm;
2163   PetscScalar            *values;
2164   DMBoundaryType         bx,by,bz;
2165   DMDAStencilType        st;
2166   ISLocalToGlobalMapping ltog;
2167 
2168   PetscFunctionBegin;
2169   /*
2170      nc - number of components per grid point
2171      col - number of colors needed in one direction for single component problem
2172   */
2173   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,NULL,NULL,NULL,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
2174   col  = 2*s + 1;
2175 
2176   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
2177   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
2178   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
2179 
2180   /* create the matrix */
2181   ierr = PetscMalloc1(col*col*col,&cols);CHKERRQ(ierr);
2182 
2183   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
2184 
2185   /* determine the matrix preallocation information */
2186   ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
2187   for (i=xs; i<xs+nx; i++) {
2188     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2189     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2190     for (j=ys; j<ys+ny; j++) {
2191       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2192       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2193       for (k=zs; k<zs+nz; k++) {
2194         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2195         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2196 
2197         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2198 
2199         /* Find block columns in block row */
2200         cnt = 0;
2201         for (ii=istart; ii<iend+1; ii++) {
2202           for (jj=jstart; jj<jend+1; jj++) {
2203             for (kk=kstart; kk<kend+1; kk++) {
2204               if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
2205                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
2206               }
2207             }
2208           }
2209         }
2210         ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
2211         ierr = MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
2212       }
2213     }
2214   }
2215   ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
2216   ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
2217   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2218 
2219   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
2220 
2221   /*
2222     For each node in the grid: we get the neighbors in the local (on processor ordering
2223     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2224     PETSc ordering.
2225   */
2226   if (!da->prealloc_only) {
2227     ierr = PetscCalloc1(col*col*col*nc*nc,&values);CHKERRQ(ierr);
2228     for (i=xs; i<xs+nx; i++) {
2229       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2230       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2231       for (j=ys; j<ys+ny; j++) {
2232         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2233         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2234         for (k=zs; k<zs+nz; k++) {
2235           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2236           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2237 
2238           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2239 
2240           cnt = 0;
2241           for (ii=istart; ii<iend+1; ii++) {
2242             for (jj=jstart; jj<jend+1; jj++) {
2243               for (kk=kstart; kk<kend+1; kk++) {
2244                 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
2245                   cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
2246                 }
2247               }
2248             }
2249           }
2250           ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
2251           ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
2252         }
2253       }
2254     }
2255     ierr = PetscFree(values);CHKERRQ(ierr);
2256     /* do not copy values to GPU since they are all zero and not yet needed there */
2257     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
2258     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2259     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2260     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
2261     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2262   }
2263   ierr = PetscFree(cols);CHKERRQ(ierr);
2264   PetscFunctionReturn(0);
2265 }
2266 
2267 /* ---------------------------------------------------------------------------------*/
2268 
2269 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da,Mat J)
2270 {
2271   PetscErrorCode         ierr;
2272   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
2273   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt, maxcnt = 0,l,p,*dnz,*onz;
2274   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
2275   DM_DA                  *dd = (DM_DA*)da->data;
2276   PetscInt               ifill_col,*dfill = dd->dfill,*ofill = dd->ofill;
2277   MPI_Comm               comm;
2278   PetscScalar            *values;
2279   DMBoundaryType         bx,by,bz;
2280   ISLocalToGlobalMapping ltog;
2281   DMDAStencilType        st;
2282   PetscBool              removedups = PETSC_FALSE;
2283 
2284   PetscFunctionBegin;
2285   /*
2286          nc - number of components per grid point
2287          col - number of colors needed in one direction for single component problem
2288 
2289   */
2290   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
2291   col  = 2*s + 1;
2292   if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
2293                  by 2*stencil_width + 1\n");
2294   if (by == DM_BOUNDARY_PERIODIC && (n % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
2295                  by 2*stencil_width + 1\n");
2296   if (bz == DM_BOUNDARY_PERIODIC && (p % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
2297                  by 2*stencil_width + 1\n");
2298 
2299   /*
2300        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
2301        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
2302   */
2303   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
2304   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
2305   if (P == 1 && 2*s >= p) removedups = PETSC_TRUE;
2306 
2307   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
2308   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
2309   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
2310 
2311   ierr = PetscMalloc1(col*col*col*nc,&cols);CHKERRQ(ierr);
2312   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
2313 
2314   /* determine the matrix preallocation information */
2315   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
2316 
2317   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
2318   for (i=xs; i<xs+nx; i++) {
2319     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2320     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2321     for (j=ys; j<ys+ny; j++) {
2322       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2323       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2324       for (k=zs; k<zs+nz; k++) {
2325         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2326         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2327 
2328         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2329 
2330         for (l=0; l<nc; l++) {
2331           cnt = 0;
2332           for (ii=istart; ii<iend+1; ii++) {
2333             for (jj=jstart; jj<jend+1; jj++) {
2334               for (kk=kstart; kk<kend+1; kk++) {
2335                 if (ii || jj || kk) {
2336                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
2337                     for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2338                   }
2339                 } else {
2340                   if (dfill) {
2341                     for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2342                   } else {
2343                     for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2344                   }
2345                 }
2346               }
2347             }
2348           }
2349           row  = l + nc*(slot);
2350           maxcnt = PetscMax(maxcnt,cnt);
2351           if (removedups) {
2352             ierr = MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
2353           } else {
2354             ierr = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
2355           }
2356         }
2357       }
2358     }
2359   }
2360   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
2361   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
2362   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2363   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
2364 
2365   /*
2366     For each node in the grid: we get the neighbors in the local (on processor ordering
2367     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2368     PETSc ordering.
2369   */
2370   if (!da->prealloc_only) {
2371     ierr = PetscCalloc1(maxcnt,&values);CHKERRQ(ierr);
2372     for (i=xs; i<xs+nx; i++) {
2373       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2374       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2375       for (j=ys; j<ys+ny; j++) {
2376         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2377         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2378         for (k=zs; k<zs+nz; k++) {
2379           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2380           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2381 
2382           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2383 
2384           for (l=0; l<nc; l++) {
2385             cnt = 0;
2386             for (ii=istart; ii<iend+1; ii++) {
2387               for (jj=jstart; jj<jend+1; jj++) {
2388                 for (kk=kstart; kk<kend+1; kk++) {
2389                   if (ii || jj || kk) {
2390                     if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
2391                       for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2392                     }
2393                   } else {
2394                     if (dfill) {
2395                       for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2396                     } else {
2397                       for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2398                     }
2399                   }
2400                 }
2401               }
2402             }
2403             row  = l + nc*(slot);
2404             ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
2405           }
2406         }
2407       }
2408     }
2409     ierr = PetscFree(values);CHKERRQ(ierr);
2410     /* do not copy values to GPU since they are all zero and not yet needed there */
2411     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
2412     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2413     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2414     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
2415     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2416   }
2417   ierr = PetscFree(cols);CHKERRQ(ierr);
2418   PetscFunctionReturn(0);
2419 }
2420