xref: /petsc/src/dm/impls/da/fdda.c (revision b294de211100dc04b4749667b026c87656b60f6e)
1 
2 #include <petsc/private/dmdaimpl.h> /*I      "petscdmda.h"     I*/
3 #include <petscmat.h>
4 
5 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM,ISColoringType,ISColoring*);
6 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM,ISColoringType,ISColoring*);
7 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM,ISColoringType,ISColoring*);
8 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM,ISColoringType,ISColoring*);
9 
10 /*
11    For ghost i that may be negative or greater than the upper bound this
12   maps it into the 0:m-1 range using periodicity
13 */
14 #define SetInRange(i,m) ((i < 0) ? m+i : ((i >= m) ? i-m : i))
15 
16 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill,PetscInt w,PetscInt **rfill)
17 {
18   PetscErrorCode ierr;
19   PetscInt       i,j,nz,*fill;
20 
21   PetscFunctionBegin;
22   if (!dfill) PetscFunctionReturn(0);
23 
24   /* count number nonzeros */
25   nz = 0;
26   for (i=0; i<w; i++) {
27     for (j=0; j<w; j++) {
28       if (dfill[w*i+j]) nz++;
29     }
30   }
31   ierr = PetscMalloc1(nz + w + 1,&fill);CHKERRQ(ierr);
32   /* construct modified CSR storage of nonzero structure */
33   /*  fill[0 -- w] marks starts of each row of column indices (and end of last row)
34    so fill[1] - fill[0] gives number of nonzeros in first row etc */
35   nz = w + 1;
36   for (i=0; i<w; i++) {
37     fill[i] = nz;
38     for (j=0; j<w; j++) {
39       if (dfill[w*i+j]) {
40         fill[nz] = j;
41         nz++;
42       }
43     }
44   }
45   fill[w] = nz;
46 
47   *rfill = fill;
48   PetscFunctionReturn(0);
49 }
50 
51 static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse,PetscInt w,PetscInt **rfill)
52 {
53   PetscErrorCode ierr;
54   PetscInt       nz;
55 
56   PetscFunctionBegin;
57   if (!dfillsparse) PetscFunctionReturn(0);
58 
59   /* Determine number of non-zeros */
60   nz = (dfillsparse[w] - w - 1);
61 
62   /* Allocate space for our copy of the given sparse matrix representation. */
63   ierr = PetscMalloc1(nz + w + 1,rfill);CHKERRQ(ierr);
64   ierr = PetscArraycpy(*rfill,dfillsparse,nz+w+1);CHKERRQ(ierr);
65   PetscFunctionReturn(0);
66 }
67 
68 static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd)
69 {
70   PetscErrorCode ierr;
71   PetscInt       i,k,cnt = 1;
72 
73   PetscFunctionBegin;
74 
75   /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of
76    columns to the left with any nonzeros in them plus 1 */
77   ierr = PetscCalloc1(dd->w,&dd->ofillcols);CHKERRQ(ierr);
78   for (i=0; i<dd->w; i++) {
79     for (k=dd->ofill[i]; k<dd->ofill[i+1]; k++) dd->ofillcols[dd->ofill[k]] = 1;
80   }
81   for (i=0; i<dd->w; i++) {
82     if (dd->ofillcols[i]) {
83       dd->ofillcols[i] = cnt++;
84     }
85   }
86   PetscFunctionReturn(0);
87 }
88 
89 /*@
90     DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem
91     of the matrix returned by DMCreateMatrix().
92 
93     Logically Collective on da
94 
95     Input Parameters:
96 +   da - the distributed array
97 .   dfill - the fill pattern in the diagonal block (may be NULL, means use dense block)
98 -   ofill - the fill pattern in the off-diagonal blocks
99 
100     Level: developer
101 
102     Notes:
103     This only makes sense when you are doing multicomponent problems but using the
104        MPIAIJ matrix format
105 
106            The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries
107        representing coupling and 0 entries for missing coupling. For example
108 $             dfill[9] = {1, 0, 0,
109 $                         1, 1, 0,
110 $                         0, 1, 1}
111        means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with
112        itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the
113        diagonal block).
114 
115      DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
116      can be represented in the dfill, ofill format
117 
118    Contributed by Glenn Hammond
119 
120 .seealso DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly()
121 
122 @*/
123 PetscErrorCode  DMDASetBlockFills(DM da,const PetscInt *dfill,const PetscInt *ofill)
124 {
125   DM_DA          *dd = (DM_DA*)da->data;
126   PetscErrorCode ierr;
127 
128   PetscFunctionBegin;
129   /* save the given dfill and ofill information */
130   ierr = DMDASetBlockFills_Private(dfill,dd->w,&dd->dfill);CHKERRQ(ierr);
131   ierr = DMDASetBlockFills_Private(ofill,dd->w,&dd->ofill);CHKERRQ(ierr);
132 
133   /* count nonzeros in ofill columns */
134   ierr = DMDASetBlockFills_Private2(dd);CHKERRQ(ierr);
135 
136   PetscFunctionReturn(0);
137 }
138 
139 /*@
140     DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem
141     of the matrix returned by DMCreateMatrix(), using sparse representations
142     of fill patterns.
143 
144     Logically Collective on da
145 
146     Input Parameters:
147 +   da - the distributed array
148 .   dfill - the sparse fill pattern in the diagonal block (may be NULL, means use dense block)
149 -   ofill - the sparse fill pattern in the off-diagonal blocks
150 
151     Level: developer
152 
153     Notes: This only makes sense when you are doing multicomponent problems but using the
154        MPIAIJ matrix format
155 
156            The format for dfill and ofill is a sparse representation of a
157            dof-by-dof matrix with 1 entries representing coupling and 0 entries
158            for missing coupling.  The sparse representation is a 1 dimensional
159            array of length nz + dof + 1, where nz is the number of non-zeros in
160            the matrix.  The first dof entries in the array give the
161            starting array indices of each row's items in the rest of the array,
162            the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array)
163            and the remaining nz items give the column indices of each of
164            the 1s within the logical 2D matrix.  Each row's items within
165            the array are the column indices of the 1s within that row
166            of the 2D matrix.  PETSc developers may recognize that this is the
167            same format as that computed by the DMDASetBlockFills_Private()
168            function from a dense 2D matrix representation.
169 
170      DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
171      can be represented in the dfill, ofill format
172 
173    Contributed by Philip C. Roth
174 
175 .seealso DMDASetBlockFills(), DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly()
176 
177 @*/
178 PetscErrorCode  DMDASetBlockFillsSparse(DM da,const PetscInt *dfillsparse,const PetscInt *ofillsparse)
179 {
180   DM_DA          *dd = (DM_DA*)da->data;
181   PetscErrorCode ierr;
182 
183   PetscFunctionBegin;
184   /* save the given dfill and ofill information */
185   ierr = DMDASetBlockFillsSparse_Private(dfillsparse,dd->w,&dd->dfill);CHKERRQ(ierr);
186   ierr = DMDASetBlockFillsSparse_Private(ofillsparse,dd->w,&dd->ofill);CHKERRQ(ierr);
187 
188   /* count nonzeros in ofill columns */
189   ierr = DMDASetBlockFills_Private2(dd);CHKERRQ(ierr);
190 
191   PetscFunctionReturn(0);
192 }
193 
194 PetscErrorCode  DMCreateColoring_DA(DM da,ISColoringType ctype,ISColoring *coloring)
195 {
196   PetscErrorCode   ierr;
197   PetscInt         dim,m,n,p,nc;
198   DMBoundaryType   bx,by,bz;
199   MPI_Comm         comm;
200   PetscMPIInt      size;
201   PetscBool        isBAIJ;
202   DM_DA            *dd = (DM_DA*)da->data;
203 
204   PetscFunctionBegin;
205   /*
206                                   m
207           ------------------------------------------------------
208          |                                                     |
209          |                                                     |
210          |               ----------------------                |
211          |               |                    |                |
212       n  |           yn  |                    |                |
213          |               |                    |                |
214          |               .---------------------                |
215          |             (xs,ys)     xn                          |
216          |            .                                        |
217          |         (gxs,gys)                                   |
218          |                                                     |
219           -----------------------------------------------------
220   */
221 
222   /*
223          nc - number of components per grid point
224          col - number of colors needed in one direction for single component problem
225 
226   */
227   ierr = DMDAGetInfo(da,&dim,NULL,NULL,NULL,&m,&n,&p,&nc,NULL,&bx,&by,&bz,NULL);CHKERRQ(ierr);
228 
229   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
230   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
231   if (ctype == IS_COLORING_LOCAL) {
232     if (size == 1) {
233       ctype = IS_COLORING_GLOBAL;
234     } else if (dim > 1) {
235       if ((m==1 && bx == DM_BOUNDARY_PERIODIC) || (n==1 && by == DM_BOUNDARY_PERIODIC) || (p==1 && bz == DM_BOUNDARY_PERIODIC)) {
236         SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain  on the same process");
237       }
238     }
239   }
240 
241   /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ
242      matrices is for the blocks, not the individual matrix elements  */
243   ierr = PetscStrbeginswith(da->mattype,MATBAIJ,&isBAIJ);CHKERRQ(ierr);
244   if (!isBAIJ) {ierr = PetscStrbeginswith(da->mattype,MATMPIBAIJ,&isBAIJ);CHKERRQ(ierr);}
245   if (!isBAIJ) {ierr = PetscStrbeginswith(da->mattype,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);}
246   if (isBAIJ) {
247     dd->w  = 1;
248     dd->xs = dd->xs/nc;
249     dd->xe = dd->xe/nc;
250     dd->Xs = dd->Xs/nc;
251     dd->Xe = dd->Xe/nc;
252   }
253 
254   /*
255      We do not provide a getcoloring function in the DMDA operations because
256    the basic DMDA does not know about matrices. We think of DMDA as being
257    more low-level then matrices.
258   */
259   if (dim == 1) {
260     ierr = DMCreateColoring_DA_1d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
261   } else if (dim == 2) {
262     ierr = DMCreateColoring_DA_2d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
263   } else if (dim == 3) {
264     ierr = DMCreateColoring_DA_3d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
265   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not done for %D dimension, send us mail petsc-maint@mcs.anl.gov for code",dim);
266   if (isBAIJ) {
267     dd->w  = nc;
268     dd->xs = dd->xs*nc;
269     dd->xe = dd->xe*nc;
270     dd->Xs = dd->Xs*nc;
271     dd->Xe = dd->Xe*nc;
272   }
273   PetscFunctionReturn(0);
274 }
275 
276 /* ---------------------------------------------------------------------------------*/
277 
278 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
279 {
280   PetscErrorCode  ierr;
281   PetscInt        xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,M,N,dim,s,k,nc,col;
282   PetscInt        ncolors;
283   MPI_Comm        comm;
284   DMBoundaryType  bx,by;
285   DMDAStencilType st;
286   ISColoringValue *colors;
287   DM_DA           *dd = (DM_DA*)da->data;
288 
289   PetscFunctionBegin;
290   /*
291          nc - number of components per grid point
292          col - number of colors needed in one direction for single component problem
293 
294   */
295   ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,&M,&N,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr);
296   col  = 2*s + 1;
297   ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr);
298   ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr);
299   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
300 
301   /* special case as taught to us by Paul Hovland */
302   if (st == DMDA_STENCIL_STAR && s == 1) {
303     ierr = DMCreateColoring_DA_2d_5pt_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
304   } else {
305     if (ctype == IS_COLORING_GLOBAL) {
306       if (!dd->localcoloring) {
307         ierr = PetscMalloc1(nc*nx*ny,&colors);CHKERRQ(ierr);
308         ii   = 0;
309         for (j=ys; j<ys+ny; j++) {
310           for (i=xs; i<xs+nx; i++) {
311             for (k=0; k<nc; k++) {
312               colors[ii++] = k + nc*((i % col) + col*(j % col));
313             }
314           }
315         }
316         ncolors = nc + nc*(col-1 + col*(col-1));
317         ierr    = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
318       }
319       *coloring = dd->localcoloring;
320     } else if (ctype == IS_COLORING_LOCAL) {
321       if (!dd->ghostedcoloring) {
322         ierr = PetscMalloc1(nc*gnx*gny,&colors);CHKERRQ(ierr);
323         ii   = 0;
324         for (j=gys; j<gys+gny; j++) {
325           for (i=gxs; i<gxs+gnx; i++) {
326             for (k=0; k<nc; k++) {
327               /* the complicated stuff is to handle periodic boundaries */
328               colors[ii++] = k + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col));
329             }
330           }
331         }
332         ncolors = nc + nc*(col - 1 + col*(col-1));
333         ierr    = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
334         /* PetscIntView(ncolors,(PetscInt*)colors,0); */
335 
336         ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
337       }
338       *coloring = dd->ghostedcoloring;
339     } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
340   }
341   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
342   PetscFunctionReturn(0);
343 }
344 
345 /* ---------------------------------------------------------------------------------*/
346 
347 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
348 {
349   PetscErrorCode  ierr;
350   PetscInt        xs,ys,nx,ny,i,j,gxs,gys,gnx,gny,m,n,p,dim,s,k,nc,col,zs,gzs,ii,l,nz,gnz,M,N,P;
351   PetscInt        ncolors;
352   MPI_Comm        comm;
353   DMBoundaryType  bx,by,bz;
354   DMDAStencilType st;
355   ISColoringValue *colors;
356   DM_DA           *dd = (DM_DA*)da->data;
357 
358   PetscFunctionBegin;
359   /*
360          nc - number of components per grid point
361          col - number of colors needed in one direction for single component problem
362 
363   */
364   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
365   col  = 2*s + 1;
366   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
367   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
368   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
369 
370   /* create the coloring */
371   if (ctype == IS_COLORING_GLOBAL) {
372     if (!dd->localcoloring) {
373       ierr = PetscMalloc1(nc*nx*ny*nz,&colors);CHKERRQ(ierr);
374       ii   = 0;
375       for (k=zs; k<zs+nz; k++) {
376         for (j=ys; j<ys+ny; j++) {
377           for (i=xs; i<xs+nx; i++) {
378             for (l=0; l<nc; l++) {
379               colors[ii++] = l + nc*((i % col) + col*(j % col) + col*col*(k % col));
380             }
381           }
382         }
383       }
384       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
385       ierr    = ISColoringCreate(comm,ncolors,nc*nx*ny*nz,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
386     }
387     *coloring = dd->localcoloring;
388   } else if (ctype == IS_COLORING_LOCAL) {
389     if (!dd->ghostedcoloring) {
390       ierr = PetscMalloc1(nc*gnx*gny*gnz,&colors);CHKERRQ(ierr);
391       ii   = 0;
392       for (k=gzs; k<gzs+gnz; k++) {
393         for (j=gys; j<gys+gny; j++) {
394           for (i=gxs; i<gxs+gnx; i++) {
395             for (l=0; l<nc; l++) {
396               /* the complicated stuff is to handle periodic boundaries */
397               colors[ii++] = l + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col) + col*col*(SetInRange(k,p) % col));
398             }
399           }
400         }
401       }
402       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
403       ierr    = ISColoringCreate(comm,ncolors,nc*gnx*gny*gnz,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
404       ierr    = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
405     }
406     *coloring = dd->ghostedcoloring;
407   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
408   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
409   PetscFunctionReturn(0);
410 }
411 
412 /* ---------------------------------------------------------------------------------*/
413 
414 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
415 {
416   PetscErrorCode  ierr;
417   PetscInt        xs,nx,i,i1,gxs,gnx,l,m,M,dim,s,nc,col;
418   PetscInt        ncolors;
419   MPI_Comm        comm;
420   DMBoundaryType  bx;
421   ISColoringValue *colors;
422   DM_DA           *dd = (DM_DA*)da->data;
423 
424   PetscFunctionBegin;
425   /*
426          nc - number of components per grid point
427          col - number of colors needed in one direction for single component problem
428 
429   */
430   ierr = DMDAGetInfo(da,&dim,&m,NULL,NULL,&M,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);CHKERRQ(ierr);
431   col  = 2*s + 1;
432   ierr = DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);CHKERRQ(ierr);
433   ierr = DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);CHKERRQ(ierr);
434   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
435 
436   /* create the coloring */
437   if (ctype == IS_COLORING_GLOBAL) {
438     if (!dd->localcoloring) {
439       ierr = PetscMalloc1(nc*nx,&colors);CHKERRQ(ierr);
440       if (dd->ofillcols) {
441         PetscInt tc = 0;
442         for (i=0; i<nc; i++) tc += (PetscInt) (dd->ofillcols[i] > 0);
443         i1 = 0;
444         for (i=xs; i<xs+nx; i++) {
445           for (l=0; l<nc; l++) {
446             if (dd->ofillcols[l] && (i % col)) {
447               colors[i1++] =  nc - 1 + tc*((i % col) - 1) + dd->ofillcols[l];
448             } else {
449               colors[i1++] = l;
450             }
451           }
452         }
453         ncolors = nc + 2*s*tc;
454       } else {
455         i1 = 0;
456         for (i=xs; i<xs+nx; i++) {
457           for (l=0; l<nc; l++) {
458             colors[i1++] = l + nc*(i % col);
459           }
460         }
461         ncolors = nc + nc*(col-1);
462       }
463       ierr = ISColoringCreate(comm,ncolors,nc*nx,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
464     }
465     *coloring = dd->localcoloring;
466   } else if (ctype == IS_COLORING_LOCAL) {
467     if (!dd->ghostedcoloring) {
468       ierr = PetscMalloc1(nc*gnx,&colors);CHKERRQ(ierr);
469       i1   = 0;
470       for (i=gxs; i<gxs+gnx; i++) {
471         for (l=0; l<nc; l++) {
472           /* the complicated stuff is to handle periodic boundaries */
473           colors[i1++] = l + nc*(SetInRange(i,m) % col);
474         }
475       }
476       ncolors = nc + nc*(col-1);
477       ierr    = ISColoringCreate(comm,ncolors,nc*gnx,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
478       ierr    = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
479     }
480     *coloring = dd->ghostedcoloring;
481   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
482   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
483   PetscFunctionReturn(0);
484 }
485 
486 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
487 {
488   PetscErrorCode  ierr;
489   PetscInt        xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,dim,s,k,nc;
490   PetscInt        ncolors;
491   MPI_Comm        comm;
492   DMBoundaryType  bx,by;
493   ISColoringValue *colors;
494   DM_DA           *dd = (DM_DA*)da->data;
495 
496   PetscFunctionBegin;
497   /*
498          nc - number of components per grid point
499          col - number of colors needed in one direction for single component problem
500 
501   */
502   ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,NULL);CHKERRQ(ierr);
503   ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr);
504   ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr);
505   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
506   /* create the coloring */
507   if (ctype == IS_COLORING_GLOBAL) {
508     if (!dd->localcoloring) {
509       ierr = PetscMalloc1(nc*nx*ny,&colors);CHKERRQ(ierr);
510       ii   = 0;
511       for (j=ys; j<ys+ny; j++) {
512         for (i=xs; i<xs+nx; i++) {
513           for (k=0; k<nc; k++) {
514             colors[ii++] = k + nc*((3*j+i) % 5);
515           }
516         }
517       }
518       ncolors = 5*nc;
519       ierr    = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
520     }
521     *coloring = dd->localcoloring;
522   } else if (ctype == IS_COLORING_LOCAL) {
523     if (!dd->ghostedcoloring) {
524       ierr = PetscMalloc1(nc*gnx*gny,&colors);CHKERRQ(ierr);
525       ii = 0;
526       for (j=gys; j<gys+gny; j++) {
527         for (i=gxs; i<gxs+gnx; i++) {
528           for (k=0; k<nc; k++) {
529             colors[ii++] = k + nc*((3*SetInRange(j,n) + SetInRange(i,m)) % 5);
530           }
531         }
532       }
533       ncolors = 5*nc;
534       ierr    = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
535       ierr    = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
536     }
537     *coloring = dd->ghostedcoloring;
538   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
539   PetscFunctionReturn(0);
540 }
541 
542 /* =========================================================================== */
543 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM,Mat,PetscBool);
544 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM,Mat);
545 extern PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM,Mat,PetscBool);
546 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM,Mat,PetscBool);
547 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM,Mat);
548 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM,Mat,PetscBool);
549 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM,Mat);
550 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM,Mat);
551 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM,Mat);
552 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM,Mat);
553 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM,Mat);
554 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM,Mat);
555 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM,Mat);
556 extern PetscErrorCode DMCreateMatrix_DA_IS(DM,Mat);
557 
558 /*@C
559    MatSetupDM - Sets the DMDA that is to be used by the HYPRE_StructMatrix PETSc matrix
560 
561    Logically Collective on mat
562 
563    Input Parameters:
564 +  mat - the matrix
565 -  da - the da
566 
567    Level: intermediate
568 
569 @*/
570 PetscErrorCode MatSetupDM(Mat mat,DM da)
571 {
572   PetscErrorCode ierr;
573 
574   PetscFunctionBegin;
575   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
576   PetscValidHeaderSpecificType(da,DM_CLASSID,2,DMDA);
577   ierr = PetscTryMethod(mat,"MatSetupDM_C",(Mat,DM),(mat,da));CHKERRQ(ierr);
578   PetscFunctionReturn(0);
579 }
580 
581 PetscErrorCode  MatView_MPI_DA(Mat A,PetscViewer viewer)
582 {
583   DM                da;
584   PetscErrorCode    ierr;
585   const char        *prefix;
586   Mat               Anatural;
587   AO                ao;
588   PetscInt          rstart,rend,*petsc,i;
589   IS                is;
590   MPI_Comm          comm;
591   PetscViewerFormat format;
592 
593   PetscFunctionBegin;
594   /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */
595   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
596   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(0);
597 
598   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
599   ierr = MatGetDM(A, &da);CHKERRQ(ierr);
600   if (!da) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
601 
602   ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);
603   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
604   ierr = PetscMalloc1(rend-rstart,&petsc);CHKERRQ(ierr);
605   for (i=rstart; i<rend; i++) petsc[i-rstart] = i;
606   ierr = AOApplicationToPetsc(ao,rend-rstart,petsc);CHKERRQ(ierr);
607   ierr = ISCreateGeneral(comm,rend-rstart,petsc,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
608 
609   /* call viewer on natural ordering */
610   ierr = MatCreateSubMatrix(A,is,is,MAT_INITIAL_MATRIX,&Anatural);CHKERRQ(ierr);
611   ierr = ISDestroy(&is);CHKERRQ(ierr);
612   ierr = PetscObjectGetOptionsPrefix((PetscObject)A,&prefix);CHKERRQ(ierr);
613   ierr = PetscObjectSetOptionsPrefix((PetscObject)Anatural,prefix);CHKERRQ(ierr);
614   ierr = PetscObjectSetName((PetscObject)Anatural,((PetscObject)A)->name);CHKERRQ(ierr);
615   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE;
616   ierr = MatView(Anatural,viewer);CHKERRQ(ierr);
617   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE;
618   ierr = MatDestroy(&Anatural);CHKERRQ(ierr);
619   PetscFunctionReturn(0);
620 }
621 
622 PetscErrorCode  MatLoad_MPI_DA(Mat A,PetscViewer viewer)
623 {
624   DM             da;
625   PetscErrorCode ierr;
626   Mat            Anatural,Aapp;
627   AO             ao;
628   PetscInt       rstart,rend,*app,i,m,n,M,N;
629   IS             is;
630   MPI_Comm       comm;
631 
632   PetscFunctionBegin;
633   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
634   ierr = MatGetDM(A, &da);CHKERRQ(ierr);
635   if (!da) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
636 
637   /* Load the matrix in natural ordering */
638   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Anatural);CHKERRQ(ierr);
639   ierr = MatSetType(Anatural,((PetscObject)A)->type_name);CHKERRQ(ierr);
640   ierr = MatGetSize(A,&M,&N);CHKERRQ(ierr);
641   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
642   ierr = MatSetSizes(Anatural,m,n,M,N);CHKERRQ(ierr);
643   ierr = MatLoad(Anatural,viewer);CHKERRQ(ierr);
644 
645   /* Map natural ordering to application ordering and create IS */
646   ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);
647   ierr = MatGetOwnershipRange(Anatural,&rstart,&rend);CHKERRQ(ierr);
648   ierr = PetscMalloc1(rend-rstart,&app);CHKERRQ(ierr);
649   for (i=rstart; i<rend; i++) app[i-rstart] = i;
650   ierr = AOPetscToApplication(ao,rend-rstart,app);CHKERRQ(ierr);
651   ierr = ISCreateGeneral(comm,rend-rstart,app,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
652 
653   /* Do permutation and replace header */
654   ierr = MatCreateSubMatrix(Anatural,is,is,MAT_INITIAL_MATRIX,&Aapp);CHKERRQ(ierr);
655   ierr = MatHeaderReplace(A,&Aapp);CHKERRQ(ierr);
656   ierr = ISDestroy(&is);CHKERRQ(ierr);
657   ierr = MatDestroy(&Anatural);CHKERRQ(ierr);
658   PetscFunctionReturn(0);
659 }
660 
661 PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J)
662 {
663   PetscErrorCode ierr;
664   PetscInt       dim,dof,nx,ny,nz,dims[3],starts[3],M,N,P;
665   Mat            A;
666   MPI_Comm       comm;
667   MatType        Atype;
668   void           (*aij)(void)=NULL,(*baij)(void)=NULL,(*sbaij)(void)=NULL,(*sell)(void)=NULL,(*is)(void)=NULL;
669   MatType        mtype;
670   PetscMPIInt    size;
671   DM_DA          *dd = (DM_DA*)da->data;
672 
673   PetscFunctionBegin;
674   ierr = MatInitializePackage();CHKERRQ(ierr);
675   mtype = da->mattype;
676 
677   /*
678                                   m
679           ------------------------------------------------------
680          |                                                     |
681          |                                                     |
682          |               ----------------------                |
683          |               |                    |                |
684       n  |           ny  |                    |                |
685          |               |                    |                |
686          |               .---------------------                |
687          |             (xs,ys)     nx                          |
688          |            .                                        |
689          |         (gxs,gys)                                   |
690          |                                                     |
691           -----------------------------------------------------
692   */
693 
694   /*
695          nc - number of components per grid point
696          col - number of colors needed in one direction for single component problem
697 
698   */
699   M   = dd->M;
700   N   = dd->N;
701   P   = dd->P;
702   dim = da->dim;
703   dof = dd->w;
704   /* ierr = DMDAGetInfo(da,&dim,&M,&N,&P,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL);CHKERRQ(ierr); */
705   ierr = DMDAGetCorners(da,NULL,NULL,NULL,&nx,&ny,&nz);CHKERRQ(ierr);
706   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
707   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
708   ierr = MatSetSizes(A,dof*nx*ny*nz,dof*nx*ny*nz,dof*M*N*P,dof*M*N*P);CHKERRQ(ierr);
709   ierr = MatSetType(A,mtype);CHKERRQ(ierr);
710   ierr = MatSetFromOptions(A);CHKERRQ(ierr);
711   ierr = MatSetDM(A,da);CHKERRQ(ierr);
712   if (da->structure_only) {
713     ierr = MatSetOption(A,MAT_STRUCTURE_ONLY,PETSC_TRUE);CHKERRQ(ierr);
714   }
715   ierr = MatGetType(A,&Atype);CHKERRQ(ierr);
716   /*
717      We do not provide a getmatrix function in the DMDA operations because
718    the basic DMDA does not know about matrices. We think of DMDA as being more
719    more low-level than matrices. This is kind of cheating but, cause sometimes
720    we think of DMDA has higher level than matrices.
721 
722      We could switch based on Atype (or mtype), but we do not since the
723    specialized setting routines depend only on the particular preallocation
724    details of the matrix, not the type itself.
725   */
726   ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJSetPreallocation_C",&aij);CHKERRQ(ierr);
727   if (!aij) {
728     ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJSetPreallocation_C",&aij);CHKERRQ(ierr);
729   }
730   if (!aij) {
731     ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIBAIJSetPreallocation_C",&baij);CHKERRQ(ierr);
732     if (!baij) {
733       ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",&baij);CHKERRQ(ierr);
734     }
735     if (!baij) {
736       ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPISBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr);
737       if (!sbaij) {
738         ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr);
739       }
740       if (!sbaij) {
741         ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPISELLSetPreallocation_C",&sell);CHKERRQ(ierr);
742         if (!sell) {
743           ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqSELLSetPreallocation_C",&sell);CHKERRQ(ierr);
744         }
745       }
746       if (!sell) {
747         ierr = PetscObjectQueryFunction((PetscObject)A,"MatISSetPreallocation_C",&is);CHKERRQ(ierr);
748       }
749     }
750   }
751   if (aij) {
752     if (dim == 1) {
753       if (dd->ofill) {
754         ierr = DMCreateMatrix_DA_1d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
755       } else {
756         DMBoundaryType bx;
757         PetscMPIInt  size;
758         ierr = DMDAGetInfo(da,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&bx,NULL,NULL,NULL);CHKERRQ(ierr);
759         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)da),&size);CHKERRMPI(ierr);
760         if (size == 1 && bx == DM_BOUNDARY_NONE) {
761           ierr = DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(da,A,PETSC_FALSE);CHKERRQ(ierr);
762         } else {
763           ierr = DMCreateMatrix_DA_1d_MPIAIJ(da,A,PETSC_FALSE);CHKERRQ(ierr);
764         }
765       }
766     } else if (dim == 2) {
767       if (dd->ofill) {
768         ierr = DMCreateMatrix_DA_2d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
769       } else {
770         ierr = DMCreateMatrix_DA_2d_MPIAIJ(da,A,PETSC_FALSE);CHKERRQ(ierr);
771       }
772     } else if (dim == 3) {
773       if (dd->ofill) {
774         ierr = DMCreateMatrix_DA_3d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
775       } else {
776         ierr = DMCreateMatrix_DA_3d_MPIAIJ(da,A,PETSC_FALSE);CHKERRQ(ierr);
777       }
778     }
779   } else if (baij) {
780     if (dim == 2) {
781       ierr = DMCreateMatrix_DA_2d_MPIBAIJ(da,A);CHKERRQ(ierr);
782     } else if (dim == 3) {
783       ierr = DMCreateMatrix_DA_3d_MPIBAIJ(da,A);CHKERRQ(ierr);
784     } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
785   } else if (sbaij) {
786     if (dim == 2) {
787       ierr = DMCreateMatrix_DA_2d_MPISBAIJ(da,A);CHKERRQ(ierr);
788     } else if (dim == 3) {
789       ierr = DMCreateMatrix_DA_3d_MPISBAIJ(da,A);CHKERRQ(ierr);
790     } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
791   } else if (sell) {
792      if (dim == 2) {
793        ierr = DMCreateMatrix_DA_2d_MPISELL(da,A);CHKERRQ(ierr);
794      } else if (dim == 3) {
795        ierr = DMCreateMatrix_DA_3d_MPISELL(da,A);CHKERRQ(ierr);
796      } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
797   } else if (is) {
798     ierr = DMCreateMatrix_DA_IS(da,A);CHKERRQ(ierr);
799   } else {
800     ISLocalToGlobalMapping ltog;
801 
802     ierr = MatSetBlockSize(A,dof);CHKERRQ(ierr);
803     ierr = MatSetUp(A);CHKERRQ(ierr);
804     ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
805     ierr = MatSetLocalToGlobalMapping(A,ltog,ltog);CHKERRQ(ierr);
806   }
807   ierr = DMDAGetGhostCorners(da,&starts[0],&starts[1],&starts[2],&dims[0],&dims[1],&dims[2]);CHKERRQ(ierr);
808   ierr = MatSetStencil(A,dim,dims,starts,dof);CHKERRQ(ierr);
809   ierr = MatSetDM(A,da);CHKERRQ(ierr);
810   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
811   if (size > 1) {
812     /* change viewer to display matrix in natural ordering */
813     ierr = MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA);CHKERRQ(ierr);
814     ierr = MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA);CHKERRQ(ierr);
815   }
816   *J = A;
817   PetscFunctionReturn(0);
818 }
819 
820 /* ---------------------------------------------------------------------------------*/
821 PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
822 
823 PetscErrorCode DMCreateMatrix_DA_IS(DM dm,Mat J)
824 {
825   DM_DA                  *da = (DM_DA*)dm->data;
826   Mat                    lJ;
827   ISLocalToGlobalMapping ltog;
828   IS                     is_loc_filt, is_glob;
829   const PetscInt         *e_loc,*idx;
830   PetscInt               nel,nen,nv,dof,dim,*gidx,nb;
831   PetscBool              flg;
832   PetscErrorCode         ierr;
833 
834   /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted)
835      We need to filter the local indices that are represented through the DMDAGetElements decomposition
836      This is because the size of the local matrices in MATIS is the local size of the l2g map */
837   PetscFunctionBegin;
838   dof  = da->w;
839   dim  = dm->dim;
840 
841   ierr = MatSetBlockSize(J,dof);CHKERRQ(ierr);
842 
843   /* get local elements indices in local DMDA numbering */
844   ierr = DMDAGetElements(dm,&nel,&nen,&e_loc);CHKERRQ(ierr); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */
845   ierr = ISCreateBlock(PetscObjectComm((PetscObject)dm),dof,nel*nen,e_loc,PETSC_COPY_VALUES,&is_loc_filt);CHKERRQ(ierr);
846   ierr = DMDARestoreElements(dm,&nel,&nen,&e_loc);CHKERRQ(ierr);
847 
848   /* obtain a consistent local ordering for MATIS */
849   ierr = ISSortRemoveDups(is_loc_filt);CHKERRQ(ierr);
850   ierr = ISBlockGetLocalSize(is_loc_filt,&nb);CHKERRQ(ierr);
851   ierr = DMGetLocalToGlobalMapping(dm,&ltog);CHKERRQ(ierr);
852   ierr = ISLocalToGlobalMappingGetSize(ltog,&nv);CHKERRQ(ierr);
853   ierr = PetscMalloc1(PetscMax(nb,nv/dof),&gidx);CHKERRQ(ierr);
854   ierr = ISBlockGetIndices(is_loc_filt,&idx);CHKERRQ(ierr);
855   ierr = ISLocalToGlobalMappingApplyBlock(ltog,nb,idx,gidx);CHKERRQ(ierr);
856   ierr = ISBlockRestoreIndices(is_loc_filt,&idx);CHKERRQ(ierr);
857   ierr = ISCreateBlock(PetscObjectComm((PetscObject)dm),dof,nb,gidx,PETSC_USE_POINTER,&is_glob);CHKERRQ(ierr);
858   ierr = ISLocalToGlobalMappingCreateIS(is_glob,&ltog);CHKERRQ(ierr);
859   ierr = ISDestroy(&is_glob);CHKERRQ(ierr);
860   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
861   ierr = ISLocalToGlobalMappingDestroy(&ltog);CHKERRQ(ierr);
862 
863   /* We also attach a l2g map to the local matrices to have MatSetValueLocal to work */
864   ierr = MatISGetLocalMat(J,&lJ);CHKERRQ(ierr);
865   ierr = ISLocalToGlobalMappingCreateIS(is_loc_filt,&ltog);CHKERRQ(ierr);
866   ierr = ISDestroy(&is_loc_filt);CHKERRQ(ierr);
867   ierr = ISCreateStride(PetscObjectComm((PetscObject)lJ),nv/dof,0,1,&is_glob);CHKERRQ(ierr);
868   ierr = ISGetIndices(is_glob,&idx);CHKERRQ(ierr);
869   ierr = ISGlobalToLocalMappingApplyBlock(ltog,IS_GTOLM_MASK,nv/dof,idx,&nb,gidx);CHKERRQ(ierr);
870   ierr = ISRestoreIndices(is_glob,&idx);CHKERRQ(ierr);
871   ierr = ISDestroy(&is_glob);CHKERRQ(ierr);
872   ierr = ISLocalToGlobalMappingDestroy(&ltog);CHKERRQ(ierr);
873   ierr = ISCreateBlock(PETSC_COMM_SELF,dof,nb,gidx,PETSC_USE_POINTER,&is_loc_filt);CHKERRQ(ierr);
874   ierr = ISLocalToGlobalMappingCreateIS(is_loc_filt,&ltog);CHKERRQ(ierr);
875   ierr = ISDestroy(&is_loc_filt);CHKERRQ(ierr);
876   ierr = MatSetLocalToGlobalMapping(lJ,ltog,ltog);CHKERRQ(ierr);
877   ierr = ISLocalToGlobalMappingDestroy(&ltog);CHKERRQ(ierr);
878   ierr = PetscFree(gidx);CHKERRQ(ierr);
879 
880   /* Preallocation (not exact): we reuse the preallocation routines of the assembled version  */
881   flg = dm->prealloc_only;
882   dm->prealloc_only = PETSC_TRUE;
883   switch (dim) {
884   case 1:
885     ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",MatISSetPreallocation_IS);CHKERRQ(ierr);
886     ierr = DMCreateMatrix_DA_1d_MPIAIJ(dm,J,PETSC_TRUE);CHKERRQ(ierr);
887     ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
888     break;
889   case 2:
890     ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",MatISSetPreallocation_IS);CHKERRQ(ierr);
891     ierr = DMCreateMatrix_DA_2d_MPIAIJ(dm,J,PETSC_TRUE);CHKERRQ(ierr);
892     ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
893     break;
894   case 3:
895     ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",MatISSetPreallocation_IS);CHKERRQ(ierr);
896     ierr = DMCreateMatrix_DA_3d_MPIAIJ(dm,J,PETSC_TRUE);CHKERRQ(ierr);
897     ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
898     break;
899   default:
900     SETERRQ1(PetscObjectComm((PetscObject)dm),PETSC_ERR_SUP,"Unhandled dimension %d",dim);
901   }
902   dm->prealloc_only = flg;
903   PetscFunctionReturn(0);
904 }
905 
906 PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da,Mat J)
907 {
908   PetscErrorCode         ierr;
909   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p;
910   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
911   MPI_Comm               comm;
912   PetscScalar            *values;
913   DMBoundaryType         bx,by;
914   ISLocalToGlobalMapping ltog;
915   DMDAStencilType        st;
916 
917   PetscFunctionBegin;
918   /*
919          nc - number of components per grid point
920          col - number of colors needed in one direction for single component problem
921 
922   */
923   ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr);
924   col  = 2*s + 1;
925   ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr);
926   ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr);
927   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
928 
929   ierr = PetscMalloc2(nc,&rows,col*col*nc*nc,&cols);CHKERRQ(ierr);
930   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
931 
932   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
933   /* determine the matrix preallocation information */
934   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
935   for (i=xs; i<xs+nx; i++) {
936 
937     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
938     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
939 
940     for (j=ys; j<ys+ny; j++) {
941       slot = i - gxs + gnx*(j - gys);
942 
943       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
944       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
945 
946       cnt = 0;
947       for (k=0; k<nc; k++) {
948         for (l=lstart; l<lend+1; l++) {
949           for (p=pstart; p<pend+1; p++) {
950             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
951               cols[cnt++] = k + nc*(slot + gnx*l + p);
952             }
953           }
954         }
955         rows[k] = k + nc*(slot);
956       }
957       ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
958     }
959   }
960   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
961   ierr = MatSeqSELLSetPreallocation(J,0,dnz);CHKERRQ(ierr);
962   ierr = MatMPISELLSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
963   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
964 
965   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
966 
967   /*
968     For each node in the grid: we get the neighbors in the local (on processor ordering
969     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
970     PETSc ordering.
971   */
972   if (!da->prealloc_only) {
973     ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr);
974     for (i=xs; i<xs+nx; i++) {
975 
976       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
977       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
978 
979       for (j=ys; j<ys+ny; j++) {
980         slot = i - gxs + gnx*(j - gys);
981 
982         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
983         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
984 
985         cnt = 0;
986         for (k=0; k<nc; k++) {
987           for (l=lstart; l<lend+1; l++) {
988             for (p=pstart; p<pend+1; p++) {
989               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
990                 cols[cnt++] = k + nc*(slot + gnx*l + p);
991               }
992             }
993           }
994           rows[k] = k + nc*(slot);
995         }
996         ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
997       }
998     }
999     ierr = PetscFree(values);CHKERRQ(ierr);
1000     /* do not copy values to GPU since they are all zero and not yet needed there */
1001     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1002     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1003     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1004     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1005     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1006   }
1007   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1008   PetscFunctionReturn(0);
1009 }
1010 
1011 PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da,Mat J)
1012 {
1013   PetscErrorCode         ierr;
1014   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1015   PetscInt               m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL;
1016   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
1017   MPI_Comm               comm;
1018   PetscScalar            *values;
1019   DMBoundaryType         bx,by,bz;
1020   ISLocalToGlobalMapping ltog;
1021   DMDAStencilType        st;
1022 
1023   PetscFunctionBegin;
1024   /*
1025          nc - number of components per grid point
1026          col - number of colors needed in one direction for single component problem
1027 
1028   */
1029   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1030   col  = 2*s + 1;
1031   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1032   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1033   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1034 
1035   ierr = PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols);CHKERRQ(ierr);
1036   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1037 
1038   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1039   /* determine the matrix preallocation information */
1040   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1041   for (i=xs; i<xs+nx; i++) {
1042     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1043     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1044     for (j=ys; j<ys+ny; j++) {
1045       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1046       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1047       for (k=zs; k<zs+nz; k++) {
1048         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1049         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1050 
1051         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1052 
1053         cnt = 0;
1054         for (l=0; l<nc; l++) {
1055           for (ii=istart; ii<iend+1; ii++) {
1056             for (jj=jstart; jj<jend+1; jj++) {
1057               for (kk=kstart; kk<kend+1; kk++) {
1058                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1059                   cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1060                 }
1061               }
1062             }
1063           }
1064           rows[l] = l + nc*(slot);
1065         }
1066         ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1067       }
1068     }
1069   }
1070   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1071   ierr = MatSeqSELLSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1072   ierr = MatMPISELLSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1073   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1074   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1075 
1076   /*
1077     For each node in the grid: we get the neighbors in the local (on processor ordering
1078     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1079     PETSc ordering.
1080   */
1081   if (!da->prealloc_only) {
1082     ierr = PetscCalloc1(col*col*col*nc*nc*nc,&values);CHKERRQ(ierr);
1083     for (i=xs; i<xs+nx; i++) {
1084       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1085       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1086       for (j=ys; j<ys+ny; j++) {
1087         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1088         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1089         for (k=zs; k<zs+nz; k++) {
1090           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1091           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1092 
1093           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1094 
1095           cnt = 0;
1096           for (l=0; l<nc; l++) {
1097             for (ii=istart; ii<iend+1; ii++) {
1098               for (jj=jstart; jj<jend+1; jj++) {
1099                 for (kk=kstart; kk<kend+1; kk++) {
1100                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1101                     cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1102                   }
1103                 }
1104               }
1105             }
1106             rows[l] = l + nc*(slot);
1107           }
1108           ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1109         }
1110       }
1111     }
1112     ierr = PetscFree(values);CHKERRQ(ierr);
1113     /* do not copy values to GPU since they are all zero and not yet needed there */
1114     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1115     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1116     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1117     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1118     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1119   }
1120   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1121   PetscFunctionReturn(0);
1122 }
1123 
1124 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da,Mat J,PetscBool isIS)
1125 {
1126   PetscErrorCode         ierr;
1127   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,M,N;
1128   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
1129   MPI_Comm               comm;
1130   DMBoundaryType         bx,by;
1131   ISLocalToGlobalMapping ltog,mltog;
1132   DMDAStencilType        st;
1133   PetscBool              removedups = PETSC_FALSE,alreadyboundtocpu = PETSC_TRUE;
1134 
1135   PetscFunctionBegin;
1136   /*
1137          nc - number of components per grid point
1138          col - number of colors needed in one direction for single component problem
1139 
1140   */
1141   ierr = DMDAGetInfo(da,&dim,&m,&n,&M,&N,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr);
1142   if (!isIS && bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) {
1143     ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE);CHKERRQ(ierr);
1144   }
1145   col  = 2*s + 1;
1146   /*
1147        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1148        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1149   */
1150   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1151   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1152   ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr);
1153   ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr);
1154   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1155 
1156   ierr = PetscMalloc2(nc,&rows,col*col*nc*nc,&cols);CHKERRQ(ierr);
1157   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1158 
1159   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1160   /* determine the matrix preallocation information */
1161   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
1162   for (i=xs; i<xs+nx; i++) {
1163 
1164     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1165     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1166 
1167     for (j=ys; j<ys+ny; j++) {
1168       slot = i - gxs + gnx*(j - gys);
1169 
1170       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1171       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1172 
1173       cnt = 0;
1174       for (k=0; k<nc; k++) {
1175         for (l=lstart; l<lend+1; l++) {
1176           for (p=pstart; p<pend+1; p++) {
1177             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
1178               cols[cnt++] = k + nc*(slot + gnx*l + p);
1179             }
1180           }
1181         }
1182         rows[k] = k + nc*(slot);
1183       }
1184       if (removedups) {
1185         ierr = MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1186       } else {
1187         ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1188       }
1189     }
1190   }
1191   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1192   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1193   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1194   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1195   ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr);
1196   if (!mltog) {
1197     ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1198   }
1199 
1200   /*
1201     For each node in the grid: we get the neighbors in the local (on processor ordering
1202     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1203     PETSc ordering.
1204   */
1205   if (!da->prealloc_only) {
1206     for (i=xs; i<xs+nx; i++) {
1207 
1208       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1209       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1210 
1211       for (j=ys; j<ys+ny; j++) {
1212         slot = i - gxs + gnx*(j - gys);
1213 
1214         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1215         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1216 
1217         cnt = 0;
1218         for (l=lstart; l<lend+1; l++) {
1219           for (p=pstart; p<pend+1; p++) {
1220             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
1221               cols[cnt++] = nc*(slot + gnx*l + p);
1222               for (k=1; k<nc; k++) {
1223                 cols[cnt] = 1 + cols[cnt-1];cnt++;
1224               }
1225             }
1226           }
1227         }
1228         for (k=0; k<nc; k++) rows[k] = k + nc*(slot);
1229         ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr);
1230       }
1231     }
1232     /* do not copy values to GPU since they are all zero and not yet needed there */
1233     ierr = MatBoundToCPU(J,&alreadyboundtocpu);CHKERRQ(ierr);
1234     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1235     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1236     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1237     if (!alreadyboundtocpu) {ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);}
1238     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1239     if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) {
1240       ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr);
1241     }
1242   }
1243   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1244   PetscFunctionReturn(0);
1245 }
1246 
1247 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da,Mat J)
1248 {
1249   PetscErrorCode         ierr;
1250   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1251   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt,maxcnt = 0,l,p,M,N;
1252   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
1253   DM_DA                  *dd = (DM_DA*)da->data;
1254   PetscInt               ifill_col,*ofill = dd->ofill, *dfill = dd->dfill;
1255   MPI_Comm               comm;
1256   DMBoundaryType         bx,by;
1257   ISLocalToGlobalMapping ltog;
1258   DMDAStencilType        st;
1259   PetscBool              removedups = PETSC_FALSE;
1260 
1261   PetscFunctionBegin;
1262   /*
1263          nc - number of components per grid point
1264          col - number of colors needed in one direction for single component problem
1265 
1266   */
1267   ierr = DMDAGetInfo(da,&dim,&m,&n,&M,&N,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr);
1268   col  = 2*s + 1;
1269   /*
1270        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1271        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1272   */
1273   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1274   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1275   ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr);
1276   ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr);
1277   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1278 
1279   ierr = PetscMalloc1(col*col*nc,&cols);CHKERRQ(ierr);
1280   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1281 
1282   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1283   /* determine the matrix preallocation information */
1284   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
1285   for (i=xs; i<xs+nx; i++) {
1286 
1287     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1288     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1289 
1290     for (j=ys; j<ys+ny; j++) {
1291       slot = i - gxs + gnx*(j - gys);
1292 
1293       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1294       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1295 
1296       for (k=0; k<nc; k++) {
1297         cnt = 0;
1298         for (l=lstart; l<lend+1; l++) {
1299           for (p=pstart; p<pend+1; p++) {
1300             if (l || p) {
1301               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
1302                 for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p);
1303               }
1304             } else {
1305               if (dfill) {
1306                 for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p);
1307               } else {
1308                 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p);
1309               }
1310             }
1311           }
1312         }
1313         row    = k + nc*(slot);
1314         maxcnt = PetscMax(maxcnt,cnt);
1315         if (removedups) {
1316           ierr   = MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1317         } else {
1318           ierr   = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1319         }
1320       }
1321     }
1322   }
1323   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1324   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1325   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1326   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1327 
1328   /*
1329     For each node in the grid: we get the neighbors in the local (on processor ordering
1330     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1331     PETSc ordering.
1332   */
1333   if (!da->prealloc_only) {
1334     for (i=xs; i<xs+nx; i++) {
1335 
1336       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1337       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1338 
1339       for (j=ys; j<ys+ny; j++) {
1340         slot = i - gxs + gnx*(j - gys);
1341 
1342         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1343         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1344 
1345         for (k=0; k<nc; k++) {
1346           cnt = 0;
1347           for (l=lstart; l<lend+1; l++) {
1348             for (p=pstart; p<pend+1; p++) {
1349               if (l || p) {
1350                 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
1351                   for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p);
1352                 }
1353               } else {
1354                 if (dfill) {
1355                   for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p);
1356                 } else {
1357                   for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p);
1358                 }
1359               }
1360             }
1361           }
1362           row  = k + nc*(slot);
1363           ierr = MatSetValuesLocal(J,1,&row,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr);
1364         }
1365       }
1366     }
1367     /* do not copy values to GPU since they are all zero and not yet needed there */
1368     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1369     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1370     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1371     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1372     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1373   }
1374   ierr = PetscFree(cols);CHKERRQ(ierr);
1375   PetscFunctionReturn(0);
1376 }
1377 
1378 /* ---------------------------------------------------------------------------------*/
1379 
1380 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da,Mat J,PetscBool isIS)
1381 {
1382   PetscErrorCode         ierr;
1383   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1384   PetscInt               m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL;
1385   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
1386   MPI_Comm               comm;
1387   DMBoundaryType         bx,by,bz;
1388   ISLocalToGlobalMapping ltog,mltog;
1389   DMDAStencilType        st;
1390   PetscBool              removedups = PETSC_FALSE;
1391 
1392   PetscFunctionBegin;
1393   /*
1394          nc - number of components per grid point
1395          col - number of colors needed in one direction for single component problem
1396 
1397   */
1398   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1399   if (!isIS && bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) {
1400     ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE);CHKERRQ(ierr);
1401   }
1402   col  = 2*s + 1;
1403 
1404   /*
1405        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1406        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1407   */
1408   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1409   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1410   if (P == 1 && 2*s >= p) removedups = PETSC_TRUE;
1411 
1412   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1413   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1414   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1415 
1416   ierr = PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols);CHKERRQ(ierr);
1417   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1418 
1419   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1420   /* determine the matrix preallocation information */
1421   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1422   for (i=xs; i<xs+nx; i++) {
1423     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1424     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1425     for (j=ys; j<ys+ny; j++) {
1426       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1427       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1428       for (k=zs; k<zs+nz; k++) {
1429         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1430         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1431 
1432         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1433 
1434         cnt = 0;
1435         for (l=0; l<nc; l++) {
1436           for (ii=istart; ii<iend+1; ii++) {
1437             for (jj=jstart; jj<jend+1; jj++) {
1438               for (kk=kstart; kk<kend+1; kk++) {
1439                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1440                   cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1441                 }
1442               }
1443             }
1444           }
1445           rows[l] = l + nc*(slot);
1446         }
1447         if (removedups) {
1448           ierr = MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1449         } else {
1450           ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1451         }
1452       }
1453     }
1454   }
1455   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1456   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1457   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1458   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1459   ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr);
1460   if (!mltog) {
1461     ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1462   }
1463 
1464   /*
1465     For each node in the grid: we get the neighbors in the local (on processor ordering
1466     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1467     PETSc ordering.
1468   */
1469   if (!da->prealloc_only) {
1470     for (i=xs; i<xs+nx; i++) {
1471       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1472       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1473       for (j=ys; j<ys+ny; j++) {
1474         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1475         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1476         for (k=zs; k<zs+nz; k++) {
1477           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1478           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1479 
1480           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1481 
1482           cnt = 0;
1483           for (kk=kstart; kk<kend+1; kk++) {
1484             for (jj=jstart; jj<jend+1; jj++) {
1485               for (ii=istart; ii<iend+1; ii++) {
1486                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1487                   cols[cnt++] = nc*(slot + ii + gnx*jj + gnx*gny*kk);
1488                     for (l=1; l<nc; l++) {
1489                       cols[cnt] = 1 + cols[cnt-1];cnt++;
1490                   }
1491                 }
1492               }
1493             }
1494           }
1495           rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1];
1496           ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr);
1497         }
1498       }
1499     }
1500     /* do not copy values to GPU since they are all zero and not yet needed there */
1501     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1502     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1503     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1504     if (!isIS && bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) {
1505       ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr);
1506     }
1507     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1508     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1509   }
1510   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1511   PetscFunctionReturn(0);
1512 }
1513 
1514 /* ---------------------------------------------------------------------------------*/
1515 
1516 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da,Mat J)
1517 {
1518   PetscErrorCode         ierr;
1519   DM_DA                  *dd = (DM_DA*)da->data;
1520   PetscInt               xs,nx,i,j,gxs,gnx,row,k,l;
1521   PetscInt               m,dim,s,*cols = NULL,nc,cnt,maxcnt = 0,*ocols;
1522   PetscInt               *ofill = dd->ofill,*dfill = dd->dfill;
1523   DMBoundaryType         bx;
1524   ISLocalToGlobalMapping ltog;
1525   PetscMPIInt            rank,size;
1526 
1527   PetscFunctionBegin;
1528   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)da),&rank);CHKERRMPI(ierr);
1529   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)da),&size);CHKERRMPI(ierr);
1530 
1531   /*
1532          nc - number of components per grid point
1533 
1534   */
1535   ierr = DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);CHKERRQ(ierr);
1536   if (s > 1) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Matrix creation for 1d not implemented correctly for stencil width larger than 1");
1537   ierr = DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);CHKERRQ(ierr);
1538   ierr = DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);CHKERRQ(ierr);
1539 
1540   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1541   ierr = PetscCalloc2(nx*nc,&cols,nx*nc,&ocols);CHKERRQ(ierr);
1542 
1543   /*
1544         note should be smaller for first and last process with no periodic
1545         does not handle dfill
1546   */
1547   cnt = 0;
1548   /* coupling with process to the left */
1549   for (i=0; i<s; i++) {
1550     for (j=0; j<nc; j++) {
1551       ocols[cnt] = ((rank == 0) ? 0 : (s - i)*(ofill[j+1] - ofill[j]));
1552       cols[cnt]  = dfill[j+1] - dfill[j] + (s + i)*(ofill[j+1] - ofill[j]);
1553       if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1554         if (size > 1) ocols[cnt] += (s - i)*(ofill[j+1] - ofill[j]);
1555         else cols[cnt] += (s - i)*(ofill[j+1] - ofill[j]);
1556       }
1557       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1558       cnt++;
1559     }
1560   }
1561   for (i=s; i<nx-s; i++) {
1562     for (j=0; j<nc; j++) {
1563       cols[cnt] = dfill[j+1] - dfill[j] + 2*s*(ofill[j+1] - ofill[j]);
1564       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1565       cnt++;
1566     }
1567   }
1568   /* coupling with process to the right */
1569   for (i=nx-s; i<nx; i++) {
1570     for (j=0; j<nc; j++) {
1571       ocols[cnt] = ((rank == (size-1)) ? 0 : (i - nx + s + 1)*(ofill[j+1] - ofill[j]));
1572       cols[cnt]  = dfill[j+1] - dfill[j] + (s + nx - i - 1)*(ofill[j+1] - ofill[j]);
1573       if ((rank == size-1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1574         if (size > 1) ocols[cnt] += (i - nx + s + 1)*(ofill[j+1] - ofill[j]);
1575         else cols[cnt] += (i - nx + s + 1)*(ofill[j+1] - ofill[j]);
1576       }
1577       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1578       cnt++;
1579     }
1580   }
1581 
1582   ierr = MatSeqAIJSetPreallocation(J,0,cols);CHKERRQ(ierr);
1583   ierr = MatMPIAIJSetPreallocation(J,0,cols,0,ocols);CHKERRQ(ierr);
1584   ierr = PetscFree2(cols,ocols);CHKERRQ(ierr);
1585 
1586   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1587   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1588 
1589   /*
1590     For each node in the grid: we get the neighbors in the local (on processor ordering
1591     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1592     PETSc ordering.
1593   */
1594   if (!da->prealloc_only) {
1595     ierr = PetscMalloc1(maxcnt,&cols);CHKERRQ(ierr);
1596     row = xs*nc;
1597     /* coupling with process to the left */
1598     for (i=xs; i<xs+s; i++) {
1599       for (j=0; j<nc; j++) {
1600         cnt = 0;
1601         if (rank) {
1602           for (l=0; l<s; l++) {
1603             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1604           }
1605         }
1606         if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1607           for (l=0; l<s; l++) {
1608             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (m + i - s - l)*nc + ofill[k];
1609           }
1610         }
1611         if (dfill) {
1612           for (k=dfill[j]; k<dfill[j+1]; k++) {
1613             cols[cnt++] = i*nc + dfill[k];
1614           }
1615         } else {
1616           for (k=0; k<nc; k++) {
1617             cols[cnt++] = i*nc + k;
1618           }
1619         }
1620         for (l=0; l<s; l++) {
1621           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1622         }
1623         ierr = MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr);
1624         row++;
1625       }
1626     }
1627     for (i=xs+s; i<xs+nx-s; i++) {
1628       for (j=0; j<nc; j++) {
1629         cnt = 0;
1630         for (l=0; l<s; l++) {
1631           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1632         }
1633         if (dfill) {
1634           for (k=dfill[j]; k<dfill[j+1]; k++) {
1635             cols[cnt++] = i*nc + dfill[k];
1636           }
1637         } else {
1638           for (k=0; k<nc; k++) {
1639             cols[cnt++] = i*nc + k;
1640           }
1641         }
1642         for (l=0; l<s; l++) {
1643           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1644         }
1645         ierr = MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr);
1646         row++;
1647       }
1648     }
1649     /* coupling with process to the right */
1650     for (i=xs+nx-s; i<xs+nx; i++) {
1651       for (j=0; j<nc; j++) {
1652         cnt = 0;
1653         for (l=0; l<s; l++) {
1654           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1655         }
1656         if (dfill) {
1657           for (k=dfill[j]; k<dfill[j+1]; k++) {
1658             cols[cnt++] = i*nc + dfill[k];
1659           }
1660         } else {
1661           for (k=0; k<nc; k++) {
1662             cols[cnt++] = i*nc + k;
1663           }
1664         }
1665         if (rank < size-1) {
1666           for (l=0; l<s; l++) {
1667             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1668           }
1669         }
1670         if ((rank == size-1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1671           for (l=0; l<s; l++) {
1672             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s - l - m + 2)*nc + ofill[k];
1673           }
1674         }
1675         ierr = MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr);
1676         row++;
1677       }
1678     }
1679     ierr = PetscFree(cols);CHKERRQ(ierr);
1680     /* do not copy values to GPU since they are all zero and not yet needed there */
1681     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1682     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1683     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1684     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1685     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1686   }
1687   PetscFunctionReturn(0);
1688 }
1689 
1690 /* ---------------------------------------------------------------------------------*/
1691 
1692 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da,Mat J,PetscBool isIS)
1693 {
1694   PetscErrorCode         ierr;
1695   PetscInt               xs,nx,i,i1,slot,gxs,gnx;
1696   PetscInt               m,dim,s,*cols = NULL,nc,*rows = NULL,col,cnt,l;
1697   PetscInt               istart,iend;
1698   DMBoundaryType         bx;
1699   ISLocalToGlobalMapping ltog,mltog;
1700 
1701   PetscFunctionBegin;
1702   /*
1703          nc - number of components per grid point
1704          col - number of colors needed in one direction for single component problem
1705 
1706   */
1707   ierr = DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);CHKERRQ(ierr);
1708   if (!isIS && bx == DM_BOUNDARY_NONE) {
1709     ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE);CHKERRQ(ierr);
1710   }
1711   col  = 2*s + 1;
1712 
1713   ierr = DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);CHKERRQ(ierr);
1714   ierr = DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);CHKERRQ(ierr);
1715 
1716   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1717   ierr = MatSeqAIJSetPreallocation(J,col*nc,NULL);CHKERRQ(ierr);
1718   ierr = MatMPIAIJSetPreallocation(J,col*nc,NULL,col*nc,NULL);CHKERRQ(ierr);
1719 
1720   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1721   ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr);
1722   if (!mltog) {
1723     ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1724   }
1725 
1726   /*
1727     For each node in the grid: we get the neighbors in the local (on processor ordering
1728     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1729     PETSc ordering.
1730   */
1731   if (!da->prealloc_only) {
1732     ierr = PetscMalloc2(nc,&rows,col*nc*nc,&cols);CHKERRQ(ierr);
1733     for (i=xs; i<xs+nx; i++) {
1734       istart = PetscMax(-s,gxs - i);
1735       iend   = PetscMin(s,gxs + gnx - i - 1);
1736       slot   = i - gxs;
1737 
1738       cnt = 0;
1739       for (i1=istart; i1<iend+1; i1++) {
1740         cols[cnt++] = nc*(slot + i1);
1741         for (l=1; l<nc; l++) {
1742           cols[cnt] = 1 + cols[cnt-1];cnt++;
1743         }
1744       }
1745       rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1];
1746       ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr);
1747     }
1748     /* do not copy values to GPU since they are all zero and not yet needed there */
1749     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1750     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1751     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1752     if (!isIS && bx == DM_BOUNDARY_NONE) {
1753       ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr);
1754     }
1755     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1756     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1757     ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1758   }
1759   PetscFunctionReturn(0);
1760 }
1761 
1762 /* ---------------------------------------------------------------------------------*/
1763 
1764 PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM da,Mat J,PetscBool isIS)
1765 {
1766   PetscErrorCode         ierr;
1767   PetscInt               xs,nx,i,i1,slot,gxs,gnx;
1768   PetscInt               m,dim,s,*cols = NULL,nc,*rows = NULL,col,cnt,l;
1769   PetscInt               istart,iend;
1770   DMBoundaryType         bx;
1771   ISLocalToGlobalMapping ltog,mltog;
1772 
1773   PetscFunctionBegin;
1774   /*
1775          nc - number of components per grid point
1776          col - number of colors needed in one direction for single component problem
1777   */
1778   ierr = DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);CHKERRQ(ierr);
1779   col  = 2*s + 1;
1780 
1781   ierr = DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);CHKERRQ(ierr);
1782   ierr = DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);CHKERRQ(ierr);
1783 
1784   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1785   ierr = MatSeqAIJSetTotalPreallocation(J,nx*nc*col*nc);CHKERRQ(ierr);
1786 
1787   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1788   ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr);
1789   if (!mltog) {
1790     ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1791   }
1792 
1793   /*
1794     For each node in the grid: we get the neighbors in the local (on processor ordering
1795     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1796     PETSc ordering.
1797   */
1798   if (!da->prealloc_only) {
1799     ierr = PetscMalloc2(nc,&rows,col*nc*nc,&cols);CHKERRQ(ierr);
1800     for (i=xs; i<xs+nx; i++) {
1801       istart = PetscMax(-s,gxs - i);
1802       iend   = PetscMin(s,gxs + gnx - i - 1);
1803       slot   = i - gxs;
1804 
1805       cnt = 0;
1806       for (i1=istart; i1<iend+1; i1++) {
1807         cols[cnt++] = nc*(slot + i1);
1808         for (l=1; l<nc; l++) {
1809           cols[cnt] = 1 + cols[cnt-1];cnt++;
1810         }
1811       }
1812       rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1];
1813       ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr);
1814     }
1815     /* do not copy values to GPU since they are all zero and not yet needed there */
1816     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1817     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1818     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1819     if (!isIS && bx == DM_BOUNDARY_NONE) {
1820       ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr);
1821     }
1822     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1823     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1824     ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1825   }
1826   ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr);
1827   PetscFunctionReturn(0);
1828 }
1829 
1830 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da,Mat J)
1831 {
1832   PetscErrorCode         ierr;
1833   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1834   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
1835   PetscInt               istart,iend,jstart,jend,ii,jj;
1836   MPI_Comm               comm;
1837   PetscScalar            *values;
1838   DMBoundaryType         bx,by;
1839   DMDAStencilType        st;
1840   ISLocalToGlobalMapping ltog;
1841 
1842   PetscFunctionBegin;
1843   /*
1844      nc - number of components per grid point
1845      col - number of colors needed in one direction for single component problem
1846   */
1847   ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr);
1848   col  = 2*s + 1;
1849 
1850   ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr);
1851   ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr);
1852   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1853 
1854   ierr = PetscMalloc1(col*col*nc*nc,&cols);CHKERRQ(ierr);
1855 
1856   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1857 
1858   /* determine the matrix preallocation information */
1859   ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
1860   for (i=xs; i<xs+nx; i++) {
1861     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1862     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1863     for (j=ys; j<ys+ny; j++) {
1864       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1865       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1866       slot   = i - gxs + gnx*(j - gys);
1867 
1868       /* Find block columns in block row */
1869       cnt = 0;
1870       for (ii=istart; ii<iend+1; ii++) {
1871         for (jj=jstart; jj<jend+1; jj++) {
1872           if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1873             cols[cnt++] = slot + ii + gnx*jj;
1874           }
1875         }
1876       }
1877       ierr = MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1878     }
1879   }
1880   ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1881   ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1882   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1883 
1884   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1885 
1886   /*
1887     For each node in the grid: we get the neighbors in the local (on processor ordering
1888     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1889     PETSc ordering.
1890   */
1891   if (!da->prealloc_only) {
1892     ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr);
1893     for (i=xs; i<xs+nx; i++) {
1894       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1895       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1896       for (j=ys; j<ys+ny; j++) {
1897         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1898         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1899         slot = i - gxs + gnx*(j - gys);
1900         cnt  = 0;
1901         for (ii=istart; ii<iend+1; ii++) {
1902           for (jj=jstart; jj<jend+1; jj++) {
1903             if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1904               cols[cnt++] = slot + ii + gnx*jj;
1905             }
1906           }
1907         }
1908         ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1909       }
1910     }
1911     ierr = PetscFree(values);CHKERRQ(ierr);
1912     /* do not copy values to GPU since they are all zero and not yet needed there */
1913     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1914     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1915     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1916     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1917     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1918   }
1919   ierr = PetscFree(cols);CHKERRQ(ierr);
1920   PetscFunctionReturn(0);
1921 }
1922 
1923 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da,Mat J)
1924 {
1925   PetscErrorCode         ierr;
1926   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1927   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
1928   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1929   MPI_Comm               comm;
1930   PetscScalar            *values;
1931   DMBoundaryType         bx,by,bz;
1932   DMDAStencilType        st;
1933   ISLocalToGlobalMapping ltog;
1934 
1935   PetscFunctionBegin;
1936   /*
1937          nc - number of components per grid point
1938          col - number of colors needed in one direction for single component problem
1939 
1940   */
1941   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,NULL,NULL,NULL,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1942   col  = 2*s + 1;
1943 
1944   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1945   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1946   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1947 
1948   ierr = PetscMalloc1(col*col*col,&cols);CHKERRQ(ierr);
1949 
1950   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1951 
1952   /* determine the matrix preallocation information */
1953   ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1954   for (i=xs; i<xs+nx; i++) {
1955     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1956     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1957     for (j=ys; j<ys+ny; j++) {
1958       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1959       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1960       for (k=zs; k<zs+nz; k++) {
1961         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1962         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1963 
1964         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1965 
1966         /* Find block columns in block row */
1967         cnt = 0;
1968         for (ii=istart; ii<iend+1; ii++) {
1969           for (jj=jstart; jj<jend+1; jj++) {
1970             for (kk=kstart; kk<kend+1; kk++) {
1971               if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1972                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1973               }
1974             }
1975           }
1976         }
1977         ierr = MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1978       }
1979     }
1980   }
1981   ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1982   ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1983   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1984 
1985   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1986 
1987   /*
1988     For each node in the grid: we get the neighbors in the local (on processor ordering
1989     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1990     PETSc ordering.
1991   */
1992   if (!da->prealloc_only) {
1993     ierr = PetscCalloc1(col*col*col*nc*nc,&values);CHKERRQ(ierr);
1994     for (i=xs; i<xs+nx; i++) {
1995       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1996       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1997       for (j=ys; j<ys+ny; j++) {
1998         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1999         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2000         for (k=zs; k<zs+nz; k++) {
2001           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2002           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2003 
2004           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2005 
2006           cnt = 0;
2007           for (ii=istart; ii<iend+1; ii++) {
2008             for (jj=jstart; jj<jend+1; jj++) {
2009               for (kk=kstart; kk<kend+1; kk++) {
2010                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
2011                   cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
2012                 }
2013               }
2014             }
2015           }
2016           ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
2017         }
2018       }
2019     }
2020     ierr = PetscFree(values);CHKERRQ(ierr);
2021     /* do not copy values to GPU since they are all zero and not yet needed there */
2022     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
2023     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2024     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2025     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
2026     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2027   }
2028   ierr = PetscFree(cols);CHKERRQ(ierr);
2029   PetscFunctionReturn(0);
2030 }
2031 
2032 /*
2033   This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to
2034   identify in the local ordering with periodic domain.
2035 */
2036 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog,PetscInt *row,PetscInt *cnt,PetscInt col[])
2037 {
2038   PetscErrorCode ierr;
2039   PetscInt       i,n;
2040 
2041   PetscFunctionBegin;
2042   ierr = ISLocalToGlobalMappingApplyBlock(ltog,1,row,row);CHKERRQ(ierr);
2043   ierr = ISLocalToGlobalMappingApplyBlock(ltog,*cnt,col,col);CHKERRQ(ierr);
2044   for (i=0,n=0; i<*cnt; i++) {
2045     if (col[i] >= *row) col[n++] = col[i];
2046   }
2047   *cnt = n;
2048   PetscFunctionReturn(0);
2049 }
2050 
2051 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da,Mat J)
2052 {
2053   PetscErrorCode         ierr;
2054   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
2055   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
2056   PetscInt               istart,iend,jstart,jend,ii,jj;
2057   MPI_Comm               comm;
2058   PetscScalar            *values;
2059   DMBoundaryType         bx,by;
2060   DMDAStencilType        st;
2061   ISLocalToGlobalMapping ltog;
2062 
2063   PetscFunctionBegin;
2064   /*
2065      nc - number of components per grid point
2066      col - number of colors needed in one direction for single component problem
2067   */
2068   ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr);
2069   col  = 2*s + 1;
2070 
2071   ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr);
2072   ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr);
2073   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
2074 
2075   ierr = PetscMalloc1(col*col*nc*nc,&cols);CHKERRQ(ierr);
2076 
2077   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
2078 
2079   /* determine the matrix preallocation information */
2080   ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
2081   for (i=xs; i<xs+nx; i++) {
2082     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2083     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2084     for (j=ys; j<ys+ny; j++) {
2085       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2086       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2087       slot   = i - gxs + gnx*(j - gys);
2088 
2089       /* Find block columns in block row */
2090       cnt = 0;
2091       for (ii=istart; ii<iend+1; ii++) {
2092         for (jj=jstart; jj<jend+1; jj++) {
2093           if (st == DMDA_STENCIL_BOX || !ii || !jj) {
2094             cols[cnt++] = slot + ii + gnx*jj;
2095           }
2096         }
2097       }
2098       ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
2099       ierr = MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
2100     }
2101   }
2102   ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
2103   ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
2104   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2105 
2106   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
2107 
2108   /*
2109     For each node in the grid: we get the neighbors in the local (on processor ordering
2110     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2111     PETSc ordering.
2112   */
2113   if (!da->prealloc_only) {
2114     ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr);
2115     for (i=xs; i<xs+nx; i++) {
2116       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2117       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2118       for (j=ys; j<ys+ny; j++) {
2119         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2120         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2121         slot   = i - gxs + gnx*(j - gys);
2122 
2123         /* Find block columns in block row */
2124         cnt = 0;
2125         for (ii=istart; ii<iend+1; ii++) {
2126           for (jj=jstart; jj<jend+1; jj++) {
2127             if (st == DMDA_STENCIL_BOX || !ii || !jj) {
2128               cols[cnt++] = slot + ii + gnx*jj;
2129             }
2130           }
2131         }
2132         ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
2133         ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
2134       }
2135     }
2136     ierr = PetscFree(values);CHKERRQ(ierr);
2137     /* do not copy values to GPU since they are all zero and not yet needed there */
2138     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
2139     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2140     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2141     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
2142     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2143   }
2144   ierr = PetscFree(cols);CHKERRQ(ierr);
2145   PetscFunctionReturn(0);
2146 }
2147 
2148 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da,Mat J)
2149 {
2150   PetscErrorCode         ierr;
2151   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
2152   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
2153   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
2154   MPI_Comm               comm;
2155   PetscScalar            *values;
2156   DMBoundaryType         bx,by,bz;
2157   DMDAStencilType        st;
2158   ISLocalToGlobalMapping ltog;
2159 
2160   PetscFunctionBegin;
2161   /*
2162      nc - number of components per grid point
2163      col - number of colors needed in one direction for single component problem
2164   */
2165   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,NULL,NULL,NULL,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
2166   col  = 2*s + 1;
2167 
2168   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
2169   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
2170   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
2171 
2172   /* create the matrix */
2173   ierr = PetscMalloc1(col*col*col,&cols);CHKERRQ(ierr);
2174 
2175   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
2176 
2177   /* determine the matrix preallocation information */
2178   ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
2179   for (i=xs; i<xs+nx; i++) {
2180     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2181     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2182     for (j=ys; j<ys+ny; j++) {
2183       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2184       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2185       for (k=zs; k<zs+nz; k++) {
2186         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2187         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2188 
2189         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2190 
2191         /* Find block columns in block row */
2192         cnt = 0;
2193         for (ii=istart; ii<iend+1; ii++) {
2194           for (jj=jstart; jj<jend+1; jj++) {
2195             for (kk=kstart; kk<kend+1; kk++) {
2196               if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
2197                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
2198               }
2199             }
2200           }
2201         }
2202         ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
2203         ierr = MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
2204       }
2205     }
2206   }
2207   ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
2208   ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
2209   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2210 
2211   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
2212 
2213   /*
2214     For each node in the grid: we get the neighbors in the local (on processor ordering
2215     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2216     PETSc ordering.
2217   */
2218   if (!da->prealloc_only) {
2219     ierr = PetscCalloc1(col*col*col*nc*nc,&values);CHKERRQ(ierr);
2220     for (i=xs; i<xs+nx; i++) {
2221       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2222       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2223       for (j=ys; j<ys+ny; j++) {
2224         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2225         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2226         for (k=zs; k<zs+nz; k++) {
2227           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2228           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2229 
2230           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2231 
2232           cnt = 0;
2233           for (ii=istart; ii<iend+1; ii++) {
2234             for (jj=jstart; jj<jend+1; jj++) {
2235               for (kk=kstart; kk<kend+1; kk++) {
2236                 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
2237                   cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
2238                 }
2239               }
2240             }
2241           }
2242           ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
2243           ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
2244         }
2245       }
2246     }
2247     ierr = PetscFree(values);CHKERRQ(ierr);
2248     /* do not copy values to GPU since they are all zero and not yet needed there */
2249     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
2250     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2251     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2252     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
2253     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2254   }
2255   ierr = PetscFree(cols);CHKERRQ(ierr);
2256   PetscFunctionReturn(0);
2257 }
2258 
2259 /* ---------------------------------------------------------------------------------*/
2260 
2261 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da,Mat J)
2262 {
2263   PetscErrorCode         ierr;
2264   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
2265   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt, maxcnt = 0,l,p,*dnz,*onz;
2266   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
2267   DM_DA                  *dd = (DM_DA*)da->data;
2268   PetscInt               ifill_col,*dfill = dd->dfill,*ofill = dd->ofill;
2269   MPI_Comm               comm;
2270   PetscScalar            *values;
2271   DMBoundaryType         bx,by,bz;
2272   ISLocalToGlobalMapping ltog;
2273   DMDAStencilType        st;
2274   PetscBool              removedups = PETSC_FALSE;
2275 
2276   PetscFunctionBegin;
2277   /*
2278          nc - number of components per grid point
2279          col - number of colors needed in one direction for single component problem
2280 
2281   */
2282   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
2283   col  = 2*s + 1;
2284   if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
2285                  by 2*stencil_width + 1\n");
2286   if (by == DM_BOUNDARY_PERIODIC && (n % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
2287                  by 2*stencil_width + 1\n");
2288   if (bz == DM_BOUNDARY_PERIODIC && (p % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
2289                  by 2*stencil_width + 1\n");
2290 
2291   /*
2292        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
2293        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
2294   */
2295   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
2296   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
2297   if (P == 1 && 2*s >= p) removedups = PETSC_TRUE;
2298 
2299   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
2300   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
2301   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
2302 
2303   ierr = PetscMalloc1(col*col*col*nc,&cols);CHKERRQ(ierr);
2304   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
2305 
2306   /* determine the matrix preallocation information */
2307   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
2308 
2309   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
2310   for (i=xs; i<xs+nx; i++) {
2311     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2312     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2313     for (j=ys; j<ys+ny; j++) {
2314       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2315       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2316       for (k=zs; k<zs+nz; k++) {
2317         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2318         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2319 
2320         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2321 
2322         for (l=0; l<nc; l++) {
2323           cnt = 0;
2324           for (ii=istart; ii<iend+1; ii++) {
2325             for (jj=jstart; jj<jend+1; jj++) {
2326               for (kk=kstart; kk<kend+1; kk++) {
2327                 if (ii || jj || kk) {
2328                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
2329                     for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2330                   }
2331                 } else {
2332                   if (dfill) {
2333                     for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2334                   } else {
2335                     for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2336                   }
2337                 }
2338               }
2339             }
2340           }
2341           row  = l + nc*(slot);
2342           maxcnt = PetscMax(maxcnt,cnt);
2343           if (removedups) {
2344             ierr = MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
2345           } else {
2346             ierr = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
2347           }
2348         }
2349       }
2350     }
2351   }
2352   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
2353   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
2354   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2355   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
2356 
2357   /*
2358     For each node in the grid: we get the neighbors in the local (on processor ordering
2359     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2360     PETSc ordering.
2361   */
2362   if (!da->prealloc_only) {
2363     ierr = PetscCalloc1(maxcnt,&values);CHKERRQ(ierr);
2364     for (i=xs; i<xs+nx; i++) {
2365       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2366       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2367       for (j=ys; j<ys+ny; j++) {
2368         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2369         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2370         for (k=zs; k<zs+nz; k++) {
2371           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2372           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2373 
2374           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2375 
2376           for (l=0; l<nc; l++) {
2377             cnt = 0;
2378             for (ii=istart; ii<iend+1; ii++) {
2379               for (jj=jstart; jj<jend+1; jj++) {
2380                 for (kk=kstart; kk<kend+1; kk++) {
2381                   if (ii || jj || kk) {
2382                     if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
2383                       for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2384                     }
2385                   } else {
2386                     if (dfill) {
2387                       for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2388                     } else {
2389                       for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2390                     }
2391                   }
2392                 }
2393               }
2394             }
2395             row  = l + nc*(slot);
2396             ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
2397           }
2398         }
2399       }
2400     }
2401     ierr = PetscFree(values);CHKERRQ(ierr);
2402     /* do not copy values to GPU since they are all zero and not yet needed there */
2403     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
2404     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2405     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2406     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
2407     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2408   }
2409   ierr = PetscFree(cols);CHKERRQ(ierr);
2410   PetscFunctionReturn(0);
2411 }
2412