xref: /petsc/src/dm/impls/da/fdda.c (revision 56258e06b274a26fe61da30eb57d3f032be2525c)
1 
2 #include <petsc/private/dmdaimpl.h> /*I      "petscdmda.h"     I*/
3 #include <petscmat.h>
4 
5 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM,ISColoringType,ISColoring*);
6 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM,ISColoringType,ISColoring*);
7 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM,ISColoringType,ISColoring*);
8 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM,ISColoringType,ISColoring*);
9 
10 /*
11    For ghost i that may be negative or greater than the upper bound this
12   maps it into the 0:m-1 range using periodicity
13 */
14 #define SetInRange(i,m) ((i < 0) ? m+i : ((i >= m) ? i-m : i))
15 
16 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill,PetscInt w,PetscInt **rfill)
17 {
18   PetscErrorCode ierr;
19   PetscInt       i,j,nz,*fill;
20 
21   PetscFunctionBegin;
22   if (!dfill) PetscFunctionReturn(0);
23 
24   /* count number nonzeros */
25   nz = 0;
26   for (i=0; i<w; i++) {
27     for (j=0; j<w; j++) {
28       if (dfill[w*i+j]) nz++;
29     }
30   }
31   ierr = PetscMalloc1(nz + w + 1,&fill);CHKERRQ(ierr);
32   /* construct modified CSR storage of nonzero structure */
33   /*  fill[0 -- w] marks starts of each row of column indices (and end of last row)
34    so fill[1] - fill[0] gives number of nonzeros in first row etc */
35   nz = w + 1;
36   for (i=0; i<w; i++) {
37     fill[i] = nz;
38     for (j=0; j<w; j++) {
39       if (dfill[w*i+j]) {
40         fill[nz] = j;
41         nz++;
42       }
43     }
44   }
45   fill[w] = nz;
46 
47   *rfill = fill;
48   PetscFunctionReturn(0);
49 }
50 
51 static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse,PetscInt w,PetscInt **rfill)
52 {
53   PetscErrorCode ierr;
54   PetscInt       nz;
55 
56   PetscFunctionBegin;
57   if (!dfillsparse) PetscFunctionReturn(0);
58 
59   /* Determine number of non-zeros */
60   nz = (dfillsparse[w] - w - 1);
61 
62   /* Allocate space for our copy of the given sparse matrix representation. */
63   ierr = PetscMalloc1(nz + w + 1,rfill);CHKERRQ(ierr);
64   ierr = PetscArraycpy(*rfill,dfillsparse,nz+w+1);CHKERRQ(ierr);
65   PetscFunctionReturn(0);
66 }
67 
68 static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd)
69 {
70   PetscErrorCode ierr;
71   PetscInt       i,k,cnt = 1;
72 
73   PetscFunctionBegin;
74 
75   /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of
76    columns to the left with any nonzeros in them plus 1 */
77   ierr = PetscCalloc1(dd->w,&dd->ofillcols);CHKERRQ(ierr);
78   for (i=0; i<dd->w; i++) {
79     for (k=dd->ofill[i]; k<dd->ofill[i+1]; k++) dd->ofillcols[dd->ofill[k]] = 1;
80   }
81   for (i=0; i<dd->w; i++) {
82     if (dd->ofillcols[i]) {
83       dd->ofillcols[i] = cnt++;
84     }
85   }
86   PetscFunctionReturn(0);
87 }
88 
89 /*@
90     DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem
91     of the matrix returned by DMCreateMatrix().
92 
93     Logically Collective on da
94 
95     Input Parameters:
96 +   da - the distributed array
97 .   dfill - the fill pattern in the diagonal block (may be NULL, means use dense block)
98 -   ofill - the fill pattern in the off-diagonal blocks
99 
100     Level: developer
101 
102     Notes:
103     This only makes sense when you are doing multicomponent problems but using the
104        MPIAIJ matrix format
105 
106            The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries
107        representing coupling and 0 entries for missing coupling. For example
108 $             dfill[9] = {1, 0, 0,
109 $                         1, 1, 0,
110 $                         0, 1, 1}
111        means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with
112        itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the
113        diagonal block).
114 
115      DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
116      can be represented in the dfill, ofill format
117 
118    Contributed by Glenn Hammond
119 
120 .seealso DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly()
121 
122 @*/
123 PetscErrorCode  DMDASetBlockFills(DM da,const PetscInt *dfill,const PetscInt *ofill)
124 {
125   DM_DA          *dd = (DM_DA*)da->data;
126   PetscErrorCode ierr;
127 
128   PetscFunctionBegin;
129   /* save the given dfill and ofill information */
130   ierr = DMDASetBlockFills_Private(dfill,dd->w,&dd->dfill);CHKERRQ(ierr);
131   ierr = DMDASetBlockFills_Private(ofill,dd->w,&dd->ofill);CHKERRQ(ierr);
132 
133   /* count nonzeros in ofill columns */
134   ierr = DMDASetBlockFills_Private2(dd);CHKERRQ(ierr);
135 
136   PetscFunctionReturn(0);
137 }
138 
139 /*@
140     DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem
141     of the matrix returned by DMCreateMatrix(), using sparse representations
142     of fill patterns.
143 
144     Logically Collective on da
145 
146     Input Parameters:
147 +   da - the distributed array
148 .   dfill - the sparse fill pattern in the diagonal block (may be NULL, means use dense block)
149 -   ofill - the sparse fill pattern in the off-diagonal blocks
150 
151     Level: developer
152 
153     Notes: This only makes sense when you are doing multicomponent problems but using the
154        MPIAIJ matrix format
155 
156            The format for dfill and ofill is a sparse representation of a
157            dof-by-dof matrix with 1 entries representing coupling and 0 entries
158            for missing coupling.  The sparse representation is a 1 dimensional
159            array of length nz + dof + 1, where nz is the number of non-zeros in
160            the matrix.  The first dof entries in the array give the
161            starting array indices of each row's items in the rest of the array,
162            the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array)
163            and the remaining nz items give the column indices of each of
164            the 1s within the logical 2D matrix.  Each row's items within
165            the array are the column indices of the 1s within that row
166            of the 2D matrix.  PETSc developers may recognize that this is the
167            same format as that computed by the DMDASetBlockFills_Private()
168            function from a dense 2D matrix representation.
169 
170      DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
171      can be represented in the dfill, ofill format
172 
173    Contributed by Philip C. Roth
174 
175 .seealso DMDASetBlockFills(), DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly()
176 
177 @*/
178 PetscErrorCode  DMDASetBlockFillsSparse(DM da,const PetscInt *dfillsparse,const PetscInt *ofillsparse)
179 {
180   DM_DA          *dd = (DM_DA*)da->data;
181   PetscErrorCode ierr;
182 
183   PetscFunctionBegin;
184   /* save the given dfill and ofill information */
185   ierr = DMDASetBlockFillsSparse_Private(dfillsparse,dd->w,&dd->dfill);CHKERRQ(ierr);
186   ierr = DMDASetBlockFillsSparse_Private(ofillsparse,dd->w,&dd->ofill);CHKERRQ(ierr);
187 
188   /* count nonzeros in ofill columns */
189   ierr = DMDASetBlockFills_Private2(dd);CHKERRQ(ierr);
190 
191   PetscFunctionReturn(0);
192 }
193 
194 PetscErrorCode  DMCreateColoring_DA(DM da,ISColoringType ctype,ISColoring *coloring)
195 {
196   PetscErrorCode   ierr;
197   PetscInt         dim,m,n,p,nc;
198   DMBoundaryType   bx,by,bz;
199   MPI_Comm         comm;
200   PetscMPIInt      size;
201   PetscBool        isBAIJ;
202   DM_DA            *dd = (DM_DA*)da->data;
203 
204   PetscFunctionBegin;
205   /*
206                                   m
207           ------------------------------------------------------
208          |                                                     |
209          |                                                     |
210          |               ----------------------                |
211          |               |                    |                |
212       n  |           yn  |                    |                |
213          |               |                    |                |
214          |               .---------------------                |
215          |             (xs,ys)     xn                          |
216          |            .                                        |
217          |         (gxs,gys)                                   |
218          |                                                     |
219           -----------------------------------------------------
220   */
221 
222   /*
223          nc - number of components per grid point
224          col - number of colors needed in one direction for single component problem
225 
226   */
227   ierr = DMDAGetInfo(da,&dim,NULL,NULL,NULL,&m,&n,&p,&nc,NULL,&bx,&by,&bz,NULL);CHKERRQ(ierr);
228 
229   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
230   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
231   if (ctype == IS_COLORING_LOCAL) {
232     if (size == 1) {
233       ctype = IS_COLORING_GLOBAL;
234     } else if (dim > 1) {
235       if ((m==1 && bx == DM_BOUNDARY_PERIODIC) || (n==1 && by == DM_BOUNDARY_PERIODIC) || (p==1 && bz == DM_BOUNDARY_PERIODIC)) {
236         SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain  on the same process");
237       }
238     }
239   }
240 
241   /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ
242      matrices is for the blocks, not the individual matrix elements  */
243   ierr = PetscStrbeginswith(da->mattype,MATBAIJ,&isBAIJ);CHKERRQ(ierr);
244   if (!isBAIJ) {ierr = PetscStrbeginswith(da->mattype,MATMPIBAIJ,&isBAIJ);CHKERRQ(ierr);}
245   if (!isBAIJ) {ierr = PetscStrbeginswith(da->mattype,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);}
246   if (isBAIJ) {
247     dd->w  = 1;
248     dd->xs = dd->xs/nc;
249     dd->xe = dd->xe/nc;
250     dd->Xs = dd->Xs/nc;
251     dd->Xe = dd->Xe/nc;
252   }
253 
254   /*
255      We do not provide a getcoloring function in the DMDA operations because
256    the basic DMDA does not know about matrices. We think of DMDA as being
257    more low-level then matrices.
258   */
259   if (dim == 1) {
260     ierr = DMCreateColoring_DA_1d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
261   } else if (dim == 2) {
262     ierr = DMCreateColoring_DA_2d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
263   } else if (dim == 3) {
264     ierr = DMCreateColoring_DA_3d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
265   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not done for %D dimension, send us mail petsc-maint@mcs.anl.gov for code",dim);
266   if (isBAIJ) {
267     dd->w  = nc;
268     dd->xs = dd->xs*nc;
269     dd->xe = dd->xe*nc;
270     dd->Xs = dd->Xs*nc;
271     dd->Xe = dd->Xe*nc;
272   }
273   PetscFunctionReturn(0);
274 }
275 
276 /* ---------------------------------------------------------------------------------*/
277 
278 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
279 {
280   PetscErrorCode  ierr;
281   PetscInt        xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,M,N,dim,s,k,nc,col;
282   PetscInt        ncolors;
283   MPI_Comm        comm;
284   DMBoundaryType  bx,by;
285   DMDAStencilType st;
286   ISColoringValue *colors;
287   DM_DA           *dd = (DM_DA*)da->data;
288 
289   PetscFunctionBegin;
290   /*
291          nc - number of components per grid point
292          col - number of colors needed in one direction for single component problem
293 
294   */
295   ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,&M,&N,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr);
296   col  = 2*s + 1;
297   ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr);
298   ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr);
299   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
300 
301   /* special case as taught to us by Paul Hovland */
302   if (st == DMDA_STENCIL_STAR && s == 1) {
303     ierr = DMCreateColoring_DA_2d_5pt_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
304   } else {
305     if (ctype == IS_COLORING_GLOBAL) {
306       if (!dd->localcoloring) {
307         ierr = PetscMalloc1(nc*nx*ny,&colors);CHKERRQ(ierr);
308         ii   = 0;
309         for (j=ys; j<ys+ny; j++) {
310           for (i=xs; i<xs+nx; i++) {
311             for (k=0; k<nc; k++) {
312               colors[ii++] = k + nc*((i % col) + col*(j % col));
313             }
314           }
315         }
316         ncolors = nc + nc*(col-1 + col*(col-1));
317         ierr    = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
318       }
319       *coloring = dd->localcoloring;
320     } else if (ctype == IS_COLORING_LOCAL) {
321       if (!dd->ghostedcoloring) {
322         ierr = PetscMalloc1(nc*gnx*gny,&colors);CHKERRQ(ierr);
323         ii   = 0;
324         for (j=gys; j<gys+gny; j++) {
325           for (i=gxs; i<gxs+gnx; i++) {
326             for (k=0; k<nc; k++) {
327               /* the complicated stuff is to handle periodic boundaries */
328               colors[ii++] = k + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col));
329             }
330           }
331         }
332         ncolors = nc + nc*(col - 1 + col*(col-1));
333         ierr    = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
334         /* PetscIntView(ncolors,(PetscInt*)colors,0); */
335 
336         ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
337       }
338       *coloring = dd->ghostedcoloring;
339     } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
340   }
341   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
342   PetscFunctionReturn(0);
343 }
344 
345 /* ---------------------------------------------------------------------------------*/
346 
347 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
348 {
349   PetscErrorCode  ierr;
350   PetscInt        xs,ys,nx,ny,i,j,gxs,gys,gnx,gny,m,n,p,dim,s,k,nc,col,zs,gzs,ii,l,nz,gnz,M,N,P;
351   PetscInt        ncolors;
352   MPI_Comm        comm;
353   DMBoundaryType  bx,by,bz;
354   DMDAStencilType st;
355   ISColoringValue *colors;
356   DM_DA           *dd = (DM_DA*)da->data;
357 
358   PetscFunctionBegin;
359   /*
360          nc - number of components per grid point
361          col - number of colors needed in one direction for single component problem
362 
363   */
364   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
365   col  = 2*s + 1;
366   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
367   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
368   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
369 
370   /* create the coloring */
371   if (ctype == IS_COLORING_GLOBAL) {
372     if (!dd->localcoloring) {
373       ierr = PetscMalloc1(nc*nx*ny*nz,&colors);CHKERRQ(ierr);
374       ii   = 0;
375       for (k=zs; k<zs+nz; k++) {
376         for (j=ys; j<ys+ny; j++) {
377           for (i=xs; i<xs+nx; i++) {
378             for (l=0; l<nc; l++) {
379               colors[ii++] = l + nc*((i % col) + col*(j % col) + col*col*(k % col));
380             }
381           }
382         }
383       }
384       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
385       ierr    = ISColoringCreate(comm,ncolors,nc*nx*ny*nz,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
386     }
387     *coloring = dd->localcoloring;
388   } else if (ctype == IS_COLORING_LOCAL) {
389     if (!dd->ghostedcoloring) {
390       ierr = PetscMalloc1(nc*gnx*gny*gnz,&colors);CHKERRQ(ierr);
391       ii   = 0;
392       for (k=gzs; k<gzs+gnz; k++) {
393         for (j=gys; j<gys+gny; j++) {
394           for (i=gxs; i<gxs+gnx; i++) {
395             for (l=0; l<nc; l++) {
396               /* the complicated stuff is to handle periodic boundaries */
397               colors[ii++] = l + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col) + col*col*(SetInRange(k,p) % col));
398             }
399           }
400         }
401       }
402       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
403       ierr    = ISColoringCreate(comm,ncolors,nc*gnx*gny*gnz,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
404       ierr    = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
405     }
406     *coloring = dd->ghostedcoloring;
407   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
408   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
409   PetscFunctionReturn(0);
410 }
411 
412 /* ---------------------------------------------------------------------------------*/
413 
414 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
415 {
416   PetscErrorCode  ierr;
417   PetscInt        xs,nx,i,i1,gxs,gnx,l,m,M,dim,s,nc,col;
418   PetscInt        ncolors;
419   MPI_Comm        comm;
420   DMBoundaryType  bx;
421   ISColoringValue *colors;
422   DM_DA           *dd = (DM_DA*)da->data;
423 
424   PetscFunctionBegin;
425   /*
426          nc - number of components per grid point
427          col - number of colors needed in one direction for single component problem
428 
429   */
430   ierr = DMDAGetInfo(da,&dim,&m,NULL,NULL,&M,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);CHKERRQ(ierr);
431   col  = 2*s + 1;
432   ierr = DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);CHKERRQ(ierr);
433   ierr = DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);CHKERRQ(ierr);
434   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
435 
436   /* create the coloring */
437   if (ctype == IS_COLORING_GLOBAL) {
438     if (!dd->localcoloring) {
439       ierr = PetscMalloc1(nc*nx,&colors);CHKERRQ(ierr);
440       if (dd->ofillcols) {
441         PetscInt tc = 0;
442         for (i=0; i<nc; i++) tc += (PetscInt) (dd->ofillcols[i] > 0);
443         i1 = 0;
444         for (i=xs; i<xs+nx; i++) {
445           for (l=0; l<nc; l++) {
446             if (dd->ofillcols[l] && (i % col)) {
447               colors[i1++] =  nc - 1 + tc*((i % col) - 1) + dd->ofillcols[l];
448             } else {
449               colors[i1++] = l;
450             }
451           }
452         }
453         ncolors = nc + 2*s*tc;
454       } else {
455         i1 = 0;
456         for (i=xs; i<xs+nx; i++) {
457           for (l=0; l<nc; l++) {
458             colors[i1++] = l + nc*(i % col);
459           }
460         }
461         ncolors = nc + nc*(col-1);
462       }
463       ierr = ISColoringCreate(comm,ncolors,nc*nx,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
464     }
465     *coloring = dd->localcoloring;
466   } else if (ctype == IS_COLORING_LOCAL) {
467     if (!dd->ghostedcoloring) {
468       ierr = PetscMalloc1(nc*gnx,&colors);CHKERRQ(ierr);
469       i1   = 0;
470       for (i=gxs; i<gxs+gnx; i++) {
471         for (l=0; l<nc; l++) {
472           /* the complicated stuff is to handle periodic boundaries */
473           colors[i1++] = l + nc*(SetInRange(i,m) % col);
474         }
475       }
476       ncolors = nc + nc*(col-1);
477       ierr    = ISColoringCreate(comm,ncolors,nc*gnx,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
478       ierr    = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
479     }
480     *coloring = dd->ghostedcoloring;
481   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
482   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
483   PetscFunctionReturn(0);
484 }
485 
486 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
487 {
488   PetscErrorCode  ierr;
489   PetscInt        xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,dim,s,k,nc;
490   PetscInt        ncolors;
491   MPI_Comm        comm;
492   DMBoundaryType  bx,by;
493   ISColoringValue *colors;
494   DM_DA           *dd = (DM_DA*)da->data;
495 
496   PetscFunctionBegin;
497   /*
498          nc - number of components per grid point
499          col - number of colors needed in one direction for single component problem
500 
501   */
502   ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,NULL);CHKERRQ(ierr);
503   ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr);
504   ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr);
505   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
506   /* create the coloring */
507   if (ctype == IS_COLORING_GLOBAL) {
508     if (!dd->localcoloring) {
509       ierr = PetscMalloc1(nc*nx*ny,&colors);CHKERRQ(ierr);
510       ii   = 0;
511       for (j=ys; j<ys+ny; j++) {
512         for (i=xs; i<xs+nx; i++) {
513           for (k=0; k<nc; k++) {
514             colors[ii++] = k + nc*((3*j+i) % 5);
515           }
516         }
517       }
518       ncolors = 5*nc;
519       ierr    = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
520     }
521     *coloring = dd->localcoloring;
522   } else if (ctype == IS_COLORING_LOCAL) {
523     if (!dd->ghostedcoloring) {
524       ierr = PetscMalloc1(nc*gnx*gny,&colors);CHKERRQ(ierr);
525       ii = 0;
526       for (j=gys; j<gys+gny; j++) {
527         for (i=gxs; i<gxs+gnx; i++) {
528           for (k=0; k<nc; k++) {
529             colors[ii++] = k + nc*((3*SetInRange(j,n) + SetInRange(i,m)) % 5);
530           }
531         }
532       }
533       ncolors = 5*nc;
534       ierr    = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
535       ierr    = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
536     }
537     *coloring = dd->ghostedcoloring;
538   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
539   PetscFunctionReturn(0);
540 }
541 
542 /* =========================================================================== */
543 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM,Mat,PetscBool);
544 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM,Mat);
545 extern PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM,Mat,PetscBool);
546 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM,Mat,PetscBool);
547 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM,Mat);
548 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM,Mat,PetscBool);
549 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM,Mat);
550 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM,Mat);
551 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM,Mat);
552 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM,Mat);
553 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM,Mat);
554 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM,Mat);
555 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM,Mat);
556 extern PetscErrorCode DMCreateMatrix_DA_IS(DM,Mat);
557 
558 /*@C
559    MatSetupDM - Sets the DMDA that is to be used by the HYPRE_StructMatrix PETSc matrix
560 
561    Logically Collective on mat
562 
563    Input Parameters:
564 +  mat - the matrix
565 -  da - the da
566 
567    Level: intermediate
568 
569 @*/
570 PetscErrorCode MatSetupDM(Mat mat,DM da)
571 {
572   PetscErrorCode ierr;
573 
574   PetscFunctionBegin;
575   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
576   PetscValidHeaderSpecificType(da,DM_CLASSID,2,DMDA);
577   ierr = PetscTryMethod(mat,"MatSetupDM_C",(Mat,DM),(mat,da));CHKERRQ(ierr);
578   PetscFunctionReturn(0);
579 }
580 
581 PetscErrorCode  MatView_MPI_DA(Mat A,PetscViewer viewer)
582 {
583   DM                da;
584   PetscErrorCode    ierr;
585   const char        *prefix;
586   Mat               Anatural;
587   AO                ao;
588   PetscInt          rstart,rend,*petsc,i;
589   IS                is;
590   MPI_Comm          comm;
591   PetscViewerFormat format;
592 
593   PetscFunctionBegin;
594   /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */
595   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
596   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(0);
597 
598   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
599   ierr = MatGetDM(A, &da);CHKERRQ(ierr);
600   if (!da) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
601 
602   ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);
603   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
604   ierr = PetscMalloc1(rend-rstart,&petsc);CHKERRQ(ierr);
605   for (i=rstart; i<rend; i++) petsc[i-rstart] = i;
606   ierr = AOApplicationToPetsc(ao,rend-rstart,petsc);CHKERRQ(ierr);
607   ierr = ISCreateGeneral(comm,rend-rstart,petsc,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
608 
609   /* call viewer on natural ordering */
610   ierr = MatCreateSubMatrix(A,is,is,MAT_INITIAL_MATRIX,&Anatural);CHKERRQ(ierr);
611   ierr = ISDestroy(&is);CHKERRQ(ierr);
612   ierr = PetscObjectGetOptionsPrefix((PetscObject)A,&prefix);CHKERRQ(ierr);
613   ierr = PetscObjectSetOptionsPrefix((PetscObject)Anatural,prefix);CHKERRQ(ierr);
614   ierr = PetscObjectSetName((PetscObject)Anatural,((PetscObject)A)->name);CHKERRQ(ierr);
615   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE;
616   ierr = MatView(Anatural,viewer);CHKERRQ(ierr);
617   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE;
618   ierr = MatDestroy(&Anatural);CHKERRQ(ierr);
619   PetscFunctionReturn(0);
620 }
621 
622 PetscErrorCode  MatLoad_MPI_DA(Mat A,PetscViewer viewer)
623 {
624   DM             da;
625   PetscErrorCode ierr;
626   Mat            Anatural,Aapp;
627   AO             ao;
628   PetscInt       rstart,rend,*app,i,m,n,M,N;
629   IS             is;
630   MPI_Comm       comm;
631 
632   PetscFunctionBegin;
633   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
634   ierr = MatGetDM(A, &da);CHKERRQ(ierr);
635   if (!da) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
636 
637   /* Load the matrix in natural ordering */
638   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Anatural);CHKERRQ(ierr);
639   ierr = MatSetType(Anatural,((PetscObject)A)->type_name);CHKERRQ(ierr);
640   ierr = MatGetSize(A,&M,&N);CHKERRQ(ierr);
641   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
642   ierr = MatSetSizes(Anatural,m,n,M,N);CHKERRQ(ierr);
643   ierr = MatLoad(Anatural,viewer);CHKERRQ(ierr);
644 
645   /* Map natural ordering to application ordering and create IS */
646   ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);
647   ierr = MatGetOwnershipRange(Anatural,&rstart,&rend);CHKERRQ(ierr);
648   ierr = PetscMalloc1(rend-rstart,&app);CHKERRQ(ierr);
649   for (i=rstart; i<rend; i++) app[i-rstart] = i;
650   ierr = AOPetscToApplication(ao,rend-rstart,app);CHKERRQ(ierr);
651   ierr = ISCreateGeneral(comm,rend-rstart,app,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
652 
653   /* Do permutation and replace header */
654   ierr = MatCreateSubMatrix(Anatural,is,is,MAT_INITIAL_MATRIX,&Aapp);CHKERRQ(ierr);
655   ierr = MatHeaderReplace(A,&Aapp);CHKERRQ(ierr);
656   ierr = ISDestroy(&is);CHKERRQ(ierr);
657   ierr = MatDestroy(&Anatural);CHKERRQ(ierr);
658   PetscFunctionReturn(0);
659 }
660 
661 PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J)
662 {
663   PetscErrorCode ierr;
664   PetscInt       dim,dof,nx,ny,nz,dims[3],starts[3],M,N,P;
665   Mat            A;
666   MPI_Comm       comm;
667   MatType        Atype;
668   void           (*aij)(void)=NULL,(*baij)(void)=NULL,(*sbaij)(void)=NULL,(*sell)(void)=NULL,(*is)(void)=NULL;
669   MatType        mtype;
670   PetscMPIInt    size;
671   DM_DA          *dd = (DM_DA*)da->data;
672 
673   PetscFunctionBegin;
674   ierr = MatInitializePackage();CHKERRQ(ierr);
675   mtype = da->mattype;
676 
677   /*
678                                   m
679           ------------------------------------------------------
680          |                                                     |
681          |                                                     |
682          |               ----------------------                |
683          |               |                    |                |
684       n  |           ny  |                    |                |
685          |               |                    |                |
686          |               .---------------------                |
687          |             (xs,ys)     nx                          |
688          |            .                                        |
689          |         (gxs,gys)                                   |
690          |                                                     |
691           -----------------------------------------------------
692   */
693 
694   /*
695          nc - number of components per grid point
696          col - number of colors needed in one direction for single component problem
697 
698   */
699   M   = dd->M;
700   N   = dd->N;
701   P   = dd->P;
702   dim = da->dim;
703   dof = dd->w;
704   /* ierr = DMDAGetInfo(da,&dim,&M,&N,&P,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL);CHKERRQ(ierr); */
705   ierr = DMDAGetCorners(da,NULL,NULL,NULL,&nx,&ny,&nz);CHKERRQ(ierr);
706   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
707   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
708   ierr = MatSetSizes(A,dof*nx*ny*nz,dof*nx*ny*nz,dof*M*N*P,dof*M*N*P);CHKERRQ(ierr);
709   ierr = MatSetType(A,mtype);CHKERRQ(ierr);
710   ierr = MatSetFromOptions(A);CHKERRQ(ierr);
711   if (dof*nx*ny*nz < da->bind_below) {ierr = MatBindToCPU(A,PETSC_TRUE);CHKERRQ(ierr);}
712   ierr = MatSetDM(A,da);CHKERRQ(ierr);
713   if (da->structure_only) {
714     ierr = MatSetOption(A,MAT_STRUCTURE_ONLY,PETSC_TRUE);CHKERRQ(ierr);
715   }
716   ierr = MatGetType(A,&Atype);CHKERRQ(ierr);
717   /*
718      We do not provide a getmatrix function in the DMDA operations because
719    the basic DMDA does not know about matrices. We think of DMDA as being more
720    more low-level than matrices. This is kind of cheating but, cause sometimes
721    we think of DMDA has higher level than matrices.
722 
723      We could switch based on Atype (or mtype), but we do not since the
724    specialized setting routines depend only on the particular preallocation
725    details of the matrix, not the type itself.
726   */
727   ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJSetPreallocation_C",&aij);CHKERRQ(ierr);
728   if (!aij) {
729     ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJSetPreallocation_C",&aij);CHKERRQ(ierr);
730   }
731   if (!aij) {
732     ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIBAIJSetPreallocation_C",&baij);CHKERRQ(ierr);
733     if (!baij) {
734       ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",&baij);CHKERRQ(ierr);
735     }
736     if (!baij) {
737       ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPISBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr);
738       if (!sbaij) {
739         ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr);
740       }
741       if (!sbaij) {
742         ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPISELLSetPreallocation_C",&sell);CHKERRQ(ierr);
743         if (!sell) {
744           ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqSELLSetPreallocation_C",&sell);CHKERRQ(ierr);
745         }
746       }
747       if (!sell) {
748         ierr = PetscObjectQueryFunction((PetscObject)A,"MatISSetPreallocation_C",&is);CHKERRQ(ierr);
749       }
750     }
751   }
752   if (aij) {
753     if (dim == 1) {
754       if (dd->ofill) {
755         ierr = DMCreateMatrix_DA_1d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
756       } else {
757         DMBoundaryType bx;
758         PetscMPIInt  size;
759         ierr = DMDAGetInfo(da,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&bx,NULL,NULL,NULL);CHKERRQ(ierr);
760         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)da),&size);CHKERRMPI(ierr);
761         if (size == 1 && bx == DM_BOUNDARY_NONE) {
762           ierr = DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(da,A,PETSC_FALSE);CHKERRQ(ierr);
763         } else {
764           ierr = DMCreateMatrix_DA_1d_MPIAIJ(da,A,PETSC_FALSE);CHKERRQ(ierr);
765         }
766       }
767     } else if (dim == 2) {
768       if (dd->ofill) {
769         ierr = DMCreateMatrix_DA_2d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
770       } else {
771         ierr = DMCreateMatrix_DA_2d_MPIAIJ(da,A,PETSC_FALSE);CHKERRQ(ierr);
772       }
773     } else if (dim == 3) {
774       if (dd->ofill) {
775         ierr = DMCreateMatrix_DA_3d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
776       } else {
777         ierr = DMCreateMatrix_DA_3d_MPIAIJ(da,A,PETSC_FALSE);CHKERRQ(ierr);
778       }
779     }
780   } else if (baij) {
781     if (dim == 2) {
782       ierr = DMCreateMatrix_DA_2d_MPIBAIJ(da,A);CHKERRQ(ierr);
783     } else if (dim == 3) {
784       ierr = DMCreateMatrix_DA_3d_MPIBAIJ(da,A);CHKERRQ(ierr);
785     } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
786   } else if (sbaij) {
787     if (dim == 2) {
788       ierr = DMCreateMatrix_DA_2d_MPISBAIJ(da,A);CHKERRQ(ierr);
789     } else if (dim == 3) {
790       ierr = DMCreateMatrix_DA_3d_MPISBAIJ(da,A);CHKERRQ(ierr);
791     } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
792   } else if (sell) {
793      if (dim == 2) {
794        ierr = DMCreateMatrix_DA_2d_MPISELL(da,A);CHKERRQ(ierr);
795      } else if (dim == 3) {
796        ierr = DMCreateMatrix_DA_3d_MPISELL(da,A);CHKERRQ(ierr);
797      } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
798   } else if (is) {
799     ierr = DMCreateMatrix_DA_IS(da,A);CHKERRQ(ierr);
800   } else {
801     ISLocalToGlobalMapping ltog;
802 
803     ierr = MatSetBlockSize(A,dof);CHKERRQ(ierr);
804     ierr = MatSetUp(A);CHKERRQ(ierr);
805     ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
806     ierr = MatSetLocalToGlobalMapping(A,ltog,ltog);CHKERRQ(ierr);
807   }
808   ierr = DMDAGetGhostCorners(da,&starts[0],&starts[1],&starts[2],&dims[0],&dims[1],&dims[2]);CHKERRQ(ierr);
809   ierr = MatSetStencil(A,dim,dims,starts,dof);CHKERRQ(ierr);
810   ierr = MatSetDM(A,da);CHKERRQ(ierr);
811   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
812   if (size > 1) {
813     /* change viewer to display matrix in natural ordering */
814     ierr = MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA);CHKERRQ(ierr);
815     ierr = MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA);CHKERRQ(ierr);
816   }
817   *J = A;
818   PetscFunctionReturn(0);
819 }
820 
821 /* ---------------------------------------------------------------------------------*/
822 PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
823 
824 PetscErrorCode DMCreateMatrix_DA_IS(DM dm,Mat J)
825 {
826   DM_DA                  *da = (DM_DA*)dm->data;
827   Mat                    lJ;
828   ISLocalToGlobalMapping ltog;
829   IS                     is_loc_filt, is_glob;
830   const PetscInt         *e_loc,*idx;
831   PetscInt               nel,nen,nv,dof,dim,*gidx,nb;
832   PetscBool              flg;
833   PetscErrorCode         ierr;
834 
835   /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted)
836      We need to filter the local indices that are represented through the DMDAGetElements decomposition
837      This is because the size of the local matrices in MATIS is the local size of the l2g map */
838   PetscFunctionBegin;
839   dof  = da->w;
840   dim  = dm->dim;
841 
842   ierr = MatSetBlockSize(J,dof);CHKERRQ(ierr);
843 
844   /* get local elements indices in local DMDA numbering */
845   ierr = DMDAGetElements(dm,&nel,&nen,&e_loc);CHKERRQ(ierr); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */
846   ierr = ISCreateBlock(PetscObjectComm((PetscObject)dm),dof,nel*nen,e_loc,PETSC_COPY_VALUES,&is_loc_filt);CHKERRQ(ierr);
847   ierr = DMDARestoreElements(dm,&nel,&nen,&e_loc);CHKERRQ(ierr);
848 
849   /* obtain a consistent local ordering for MATIS */
850   ierr = ISSortRemoveDups(is_loc_filt);CHKERRQ(ierr);
851   ierr = ISBlockGetLocalSize(is_loc_filt,&nb);CHKERRQ(ierr);
852   ierr = DMGetLocalToGlobalMapping(dm,&ltog);CHKERRQ(ierr);
853   ierr = ISLocalToGlobalMappingGetSize(ltog,&nv);CHKERRQ(ierr);
854   ierr = PetscMalloc1(PetscMax(nb,nv/dof),&gidx);CHKERRQ(ierr);
855   ierr = ISBlockGetIndices(is_loc_filt,&idx);CHKERRQ(ierr);
856   ierr = ISLocalToGlobalMappingApplyBlock(ltog,nb,idx,gidx);CHKERRQ(ierr);
857   ierr = ISBlockRestoreIndices(is_loc_filt,&idx);CHKERRQ(ierr);
858   ierr = ISCreateBlock(PetscObjectComm((PetscObject)dm),dof,nb,gidx,PETSC_USE_POINTER,&is_glob);CHKERRQ(ierr);
859   ierr = ISLocalToGlobalMappingCreateIS(is_glob,&ltog);CHKERRQ(ierr);
860   ierr = ISDestroy(&is_glob);CHKERRQ(ierr);
861   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
862   ierr = ISLocalToGlobalMappingDestroy(&ltog);CHKERRQ(ierr);
863 
864   /* We also attach a l2g map to the local matrices to have MatSetValueLocal to work */
865   ierr = MatISGetLocalMat(J,&lJ);CHKERRQ(ierr);
866   ierr = ISLocalToGlobalMappingCreateIS(is_loc_filt,&ltog);CHKERRQ(ierr);
867   ierr = ISDestroy(&is_loc_filt);CHKERRQ(ierr);
868   ierr = ISCreateStride(PetscObjectComm((PetscObject)lJ),nv/dof,0,1,&is_glob);CHKERRQ(ierr);
869   ierr = ISGetIndices(is_glob,&idx);CHKERRQ(ierr);
870   ierr = ISGlobalToLocalMappingApplyBlock(ltog,IS_GTOLM_MASK,nv/dof,idx,&nb,gidx);CHKERRQ(ierr);
871   ierr = ISRestoreIndices(is_glob,&idx);CHKERRQ(ierr);
872   ierr = ISDestroy(&is_glob);CHKERRQ(ierr);
873   ierr = ISLocalToGlobalMappingDestroy(&ltog);CHKERRQ(ierr);
874   ierr = ISCreateBlock(PETSC_COMM_SELF,dof,nb,gidx,PETSC_USE_POINTER,&is_loc_filt);CHKERRQ(ierr);
875   ierr = ISLocalToGlobalMappingCreateIS(is_loc_filt,&ltog);CHKERRQ(ierr);
876   ierr = ISDestroy(&is_loc_filt);CHKERRQ(ierr);
877   ierr = MatSetLocalToGlobalMapping(lJ,ltog,ltog);CHKERRQ(ierr);
878   ierr = ISLocalToGlobalMappingDestroy(&ltog);CHKERRQ(ierr);
879   ierr = PetscFree(gidx);CHKERRQ(ierr);
880 
881   /* Preallocation (not exact): we reuse the preallocation routines of the assembled version  */
882   flg = dm->prealloc_only;
883   dm->prealloc_only = PETSC_TRUE;
884   switch (dim) {
885   case 1:
886     ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",MatISSetPreallocation_IS);CHKERRQ(ierr);
887     ierr = DMCreateMatrix_DA_1d_MPIAIJ(dm,J,PETSC_TRUE);CHKERRQ(ierr);
888     ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
889     break;
890   case 2:
891     ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",MatISSetPreallocation_IS);CHKERRQ(ierr);
892     ierr = DMCreateMatrix_DA_2d_MPIAIJ(dm,J,PETSC_TRUE);CHKERRQ(ierr);
893     ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
894     break;
895   case 3:
896     ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",MatISSetPreallocation_IS);CHKERRQ(ierr);
897     ierr = DMCreateMatrix_DA_3d_MPIAIJ(dm,J,PETSC_TRUE);CHKERRQ(ierr);
898     ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
899     break;
900   default:
901     SETERRQ1(PetscObjectComm((PetscObject)dm),PETSC_ERR_SUP,"Unhandled dimension %d",dim);
902   }
903   dm->prealloc_only = flg;
904   PetscFunctionReturn(0);
905 }
906 
907 PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da,Mat J)
908 {
909   PetscErrorCode         ierr;
910   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p;
911   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
912   MPI_Comm               comm;
913   PetscScalar            *values;
914   DMBoundaryType         bx,by;
915   ISLocalToGlobalMapping ltog;
916   DMDAStencilType        st;
917 
918   PetscFunctionBegin;
919   /*
920          nc - number of components per grid point
921          col - number of colors needed in one direction for single component problem
922 
923   */
924   ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr);
925   col  = 2*s + 1;
926   ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr);
927   ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr);
928   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
929 
930   ierr = PetscMalloc2(nc,&rows,col*col*nc*nc,&cols);CHKERRQ(ierr);
931   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
932 
933   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
934   /* determine the matrix preallocation information */
935   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
936   for (i=xs; i<xs+nx; i++) {
937 
938     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
939     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
940 
941     for (j=ys; j<ys+ny; j++) {
942       slot = i - gxs + gnx*(j - gys);
943 
944       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
945       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
946 
947       cnt = 0;
948       for (k=0; k<nc; k++) {
949         for (l=lstart; l<lend+1; l++) {
950           for (p=pstart; p<pend+1; p++) {
951             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
952               cols[cnt++] = k + nc*(slot + gnx*l + p);
953             }
954           }
955         }
956         rows[k] = k + nc*(slot);
957       }
958       ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
959     }
960   }
961   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
962   ierr = MatSeqSELLSetPreallocation(J,0,dnz);CHKERRQ(ierr);
963   ierr = MatMPISELLSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
964   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
965 
966   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
967 
968   /*
969     For each node in the grid: we get the neighbors in the local (on processor ordering
970     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
971     PETSc ordering.
972   */
973   if (!da->prealloc_only) {
974     ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr);
975     for (i=xs; i<xs+nx; i++) {
976 
977       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
978       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
979 
980       for (j=ys; j<ys+ny; j++) {
981         slot = i - gxs + gnx*(j - gys);
982 
983         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
984         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
985 
986         cnt = 0;
987         for (k=0; k<nc; k++) {
988           for (l=lstart; l<lend+1; l++) {
989             for (p=pstart; p<pend+1; p++) {
990               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
991                 cols[cnt++] = k + nc*(slot + gnx*l + p);
992               }
993             }
994           }
995           rows[k] = k + nc*(slot);
996         }
997         ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
998       }
999     }
1000     ierr = PetscFree(values);CHKERRQ(ierr);
1001     /* do not copy values to GPU since they are all zero and not yet needed there */
1002     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1003     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1004     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1005     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1006     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1007   }
1008   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1009   PetscFunctionReturn(0);
1010 }
1011 
1012 PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da,Mat J)
1013 {
1014   PetscErrorCode         ierr;
1015   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1016   PetscInt               m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL;
1017   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
1018   MPI_Comm               comm;
1019   PetscScalar            *values;
1020   DMBoundaryType         bx,by,bz;
1021   ISLocalToGlobalMapping ltog;
1022   DMDAStencilType        st;
1023 
1024   PetscFunctionBegin;
1025   /*
1026          nc - number of components per grid point
1027          col - number of colors needed in one direction for single component problem
1028 
1029   */
1030   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1031   col  = 2*s + 1;
1032   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1033   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1034   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1035 
1036   ierr = PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols);CHKERRQ(ierr);
1037   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1038 
1039   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1040   /* determine the matrix preallocation information */
1041   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1042   for (i=xs; i<xs+nx; i++) {
1043     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1044     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1045     for (j=ys; j<ys+ny; j++) {
1046       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1047       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1048       for (k=zs; k<zs+nz; k++) {
1049         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1050         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1051 
1052         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1053 
1054         cnt = 0;
1055         for (l=0; l<nc; l++) {
1056           for (ii=istart; ii<iend+1; ii++) {
1057             for (jj=jstart; jj<jend+1; jj++) {
1058               for (kk=kstart; kk<kend+1; kk++) {
1059                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1060                   cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1061                 }
1062               }
1063             }
1064           }
1065           rows[l] = l + nc*(slot);
1066         }
1067         ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1068       }
1069     }
1070   }
1071   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1072   ierr = MatSeqSELLSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1073   ierr = MatMPISELLSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1074   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1075   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1076 
1077   /*
1078     For each node in the grid: we get the neighbors in the local (on processor ordering
1079     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1080     PETSc ordering.
1081   */
1082   if (!da->prealloc_only) {
1083     ierr = PetscCalloc1(col*col*col*nc*nc*nc,&values);CHKERRQ(ierr);
1084     for (i=xs; i<xs+nx; i++) {
1085       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1086       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1087       for (j=ys; j<ys+ny; j++) {
1088         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1089         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1090         for (k=zs; k<zs+nz; k++) {
1091           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1092           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1093 
1094           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1095 
1096           cnt = 0;
1097           for (l=0; l<nc; l++) {
1098             for (ii=istart; ii<iend+1; ii++) {
1099               for (jj=jstart; jj<jend+1; jj++) {
1100                 for (kk=kstart; kk<kend+1; kk++) {
1101                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1102                     cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1103                   }
1104                 }
1105               }
1106             }
1107             rows[l] = l + nc*(slot);
1108           }
1109           ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1110         }
1111       }
1112     }
1113     ierr = PetscFree(values);CHKERRQ(ierr);
1114     /* do not copy values to GPU since they are all zero and not yet needed there */
1115     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1116     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1117     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1118     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1119     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1120   }
1121   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1122   PetscFunctionReturn(0);
1123 }
1124 
1125 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da,Mat J,PetscBool isIS)
1126 {
1127   PetscErrorCode         ierr;
1128   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,M,N;
1129   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
1130   MPI_Comm               comm;
1131   DMBoundaryType         bx,by;
1132   ISLocalToGlobalMapping ltog,mltog;
1133   DMDAStencilType        st;
1134   PetscBool              removedups = PETSC_FALSE,alreadyboundtocpu = PETSC_TRUE;
1135 
1136   PetscFunctionBegin;
1137   /*
1138          nc - number of components per grid point
1139          col - number of colors needed in one direction for single component problem
1140 
1141   */
1142   ierr = DMDAGetInfo(da,&dim,&m,&n,&M,&N,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr);
1143   if (!isIS && bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) {
1144     ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE);CHKERRQ(ierr);
1145   }
1146   col  = 2*s + 1;
1147   /*
1148        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1149        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1150   */
1151   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1152   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1153   ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr);
1154   ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr);
1155   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1156 
1157   ierr = PetscMalloc2(nc,&rows,col*col*nc*nc,&cols);CHKERRQ(ierr);
1158   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1159 
1160   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1161   /* determine the matrix preallocation information */
1162   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
1163   for (i=xs; i<xs+nx; i++) {
1164 
1165     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1166     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1167 
1168     for (j=ys; j<ys+ny; j++) {
1169       slot = i - gxs + gnx*(j - gys);
1170 
1171       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1172       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1173 
1174       cnt = 0;
1175       for (k=0; k<nc; k++) {
1176         for (l=lstart; l<lend+1; l++) {
1177           for (p=pstart; p<pend+1; p++) {
1178             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
1179               cols[cnt++] = k + nc*(slot + gnx*l + p);
1180             }
1181           }
1182         }
1183         rows[k] = k + nc*(slot);
1184       }
1185       if (removedups) {
1186         ierr = MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1187       } else {
1188         ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1189       }
1190     }
1191   }
1192   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1193   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1194   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1195   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1196   ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr);
1197   if (!mltog) {
1198     ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1199   }
1200 
1201   /*
1202     For each node in the grid: we get the neighbors in the local (on processor ordering
1203     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1204     PETSc ordering.
1205   */
1206   if (!da->prealloc_only) {
1207     for (i=xs; i<xs+nx; i++) {
1208 
1209       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1210       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1211 
1212       for (j=ys; j<ys+ny; j++) {
1213         slot = i - gxs + gnx*(j - gys);
1214 
1215         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1216         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1217 
1218         cnt = 0;
1219         for (l=lstart; l<lend+1; l++) {
1220           for (p=pstart; p<pend+1; p++) {
1221             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
1222               cols[cnt++] = nc*(slot + gnx*l + p);
1223               for (k=1; k<nc; k++) {
1224                 cols[cnt] = 1 + cols[cnt-1];cnt++;
1225               }
1226             }
1227           }
1228         }
1229         for (k=0; k<nc; k++) rows[k] = k + nc*(slot);
1230         ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr);
1231       }
1232     }
1233     /* do not copy values to GPU since they are all zero and not yet needed there */
1234     ierr = MatBoundToCPU(J,&alreadyboundtocpu);CHKERRQ(ierr);
1235     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1236     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1237     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1238     if (!alreadyboundtocpu) {ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);}
1239     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1240     if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) {
1241       ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr);
1242     }
1243   }
1244   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1245   PetscFunctionReturn(0);
1246 }
1247 
1248 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da,Mat J)
1249 {
1250   PetscErrorCode         ierr;
1251   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1252   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt,maxcnt = 0,l,p,M,N;
1253   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
1254   DM_DA                  *dd = (DM_DA*)da->data;
1255   PetscInt               ifill_col,*ofill = dd->ofill, *dfill = dd->dfill;
1256   MPI_Comm               comm;
1257   DMBoundaryType         bx,by;
1258   ISLocalToGlobalMapping ltog;
1259   DMDAStencilType        st;
1260   PetscBool              removedups = PETSC_FALSE;
1261 
1262   PetscFunctionBegin;
1263   /*
1264          nc - number of components per grid point
1265          col - number of colors needed in one direction for single component problem
1266 
1267   */
1268   ierr = DMDAGetInfo(da,&dim,&m,&n,&M,&N,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr);
1269   col  = 2*s + 1;
1270   /*
1271        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1272        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1273   */
1274   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1275   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1276   ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr);
1277   ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr);
1278   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1279 
1280   ierr = PetscMalloc1(col*col*nc,&cols);CHKERRQ(ierr);
1281   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1282 
1283   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1284   /* determine the matrix preallocation information */
1285   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
1286   for (i=xs; i<xs+nx; i++) {
1287 
1288     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1289     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1290 
1291     for (j=ys; j<ys+ny; j++) {
1292       slot = i - gxs + gnx*(j - gys);
1293 
1294       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1295       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1296 
1297       for (k=0; k<nc; k++) {
1298         cnt = 0;
1299         for (l=lstart; l<lend+1; l++) {
1300           for (p=pstart; p<pend+1; p++) {
1301             if (l || p) {
1302               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
1303                 for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p);
1304               }
1305             } else {
1306               if (dfill) {
1307                 for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p);
1308               } else {
1309                 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p);
1310               }
1311             }
1312           }
1313         }
1314         row    = k + nc*(slot);
1315         maxcnt = PetscMax(maxcnt,cnt);
1316         if (removedups) {
1317           ierr   = MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1318         } else {
1319           ierr   = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1320         }
1321       }
1322     }
1323   }
1324   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1325   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1326   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1327   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1328 
1329   /*
1330     For each node in the grid: we get the neighbors in the local (on processor ordering
1331     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1332     PETSc ordering.
1333   */
1334   if (!da->prealloc_only) {
1335     for (i=xs; i<xs+nx; i++) {
1336 
1337       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1338       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1339 
1340       for (j=ys; j<ys+ny; j++) {
1341         slot = i - gxs + gnx*(j - gys);
1342 
1343         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1344         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1345 
1346         for (k=0; k<nc; k++) {
1347           cnt = 0;
1348           for (l=lstart; l<lend+1; l++) {
1349             for (p=pstart; p<pend+1; p++) {
1350               if (l || p) {
1351                 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
1352                   for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p);
1353                 }
1354               } else {
1355                 if (dfill) {
1356                   for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p);
1357                 } else {
1358                   for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p);
1359                 }
1360               }
1361             }
1362           }
1363           row  = k + nc*(slot);
1364           ierr = MatSetValuesLocal(J,1,&row,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr);
1365         }
1366       }
1367     }
1368     /* do not copy values to GPU since they are all zero and not yet needed there */
1369     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1370     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1371     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1372     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1373     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1374   }
1375   ierr = PetscFree(cols);CHKERRQ(ierr);
1376   PetscFunctionReturn(0);
1377 }
1378 
1379 /* ---------------------------------------------------------------------------------*/
1380 
1381 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da,Mat J,PetscBool isIS)
1382 {
1383   PetscErrorCode         ierr;
1384   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1385   PetscInt               m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL;
1386   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
1387   MPI_Comm               comm;
1388   DMBoundaryType         bx,by,bz;
1389   ISLocalToGlobalMapping ltog,mltog;
1390   DMDAStencilType        st;
1391   PetscBool              removedups = PETSC_FALSE;
1392 
1393   PetscFunctionBegin;
1394   /*
1395          nc - number of components per grid point
1396          col - number of colors needed in one direction for single component problem
1397 
1398   */
1399   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1400   if (!isIS && bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) {
1401     ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE);CHKERRQ(ierr);
1402   }
1403   col  = 2*s + 1;
1404 
1405   /*
1406        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1407        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1408   */
1409   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1410   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1411   if (P == 1 && 2*s >= p) removedups = PETSC_TRUE;
1412 
1413   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1414   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1415   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1416 
1417   ierr = PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols);CHKERRQ(ierr);
1418   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1419 
1420   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1421   /* determine the matrix preallocation information */
1422   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1423   for (i=xs; i<xs+nx; i++) {
1424     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1425     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1426     for (j=ys; j<ys+ny; j++) {
1427       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1428       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1429       for (k=zs; k<zs+nz; k++) {
1430         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1431         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1432 
1433         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1434 
1435         cnt = 0;
1436         for (l=0; l<nc; l++) {
1437           for (ii=istart; ii<iend+1; ii++) {
1438             for (jj=jstart; jj<jend+1; jj++) {
1439               for (kk=kstart; kk<kend+1; kk++) {
1440                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1441                   cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1442                 }
1443               }
1444             }
1445           }
1446           rows[l] = l + nc*(slot);
1447         }
1448         if (removedups) {
1449           ierr = MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1450         } else {
1451           ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1452         }
1453       }
1454     }
1455   }
1456   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1457   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1458   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1459   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1460   ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr);
1461   if (!mltog) {
1462     ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1463   }
1464 
1465   /*
1466     For each node in the grid: we get the neighbors in the local (on processor ordering
1467     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1468     PETSc ordering.
1469   */
1470   if (!da->prealloc_only) {
1471     for (i=xs; i<xs+nx; i++) {
1472       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1473       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1474       for (j=ys; j<ys+ny; j++) {
1475         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1476         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1477         for (k=zs; k<zs+nz; k++) {
1478           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1479           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1480 
1481           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1482 
1483           cnt = 0;
1484           for (kk=kstart; kk<kend+1; kk++) {
1485             for (jj=jstart; jj<jend+1; jj++) {
1486               for (ii=istart; ii<iend+1; ii++) {
1487                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1488                   cols[cnt++] = nc*(slot + ii + gnx*jj + gnx*gny*kk);
1489                     for (l=1; l<nc; l++) {
1490                       cols[cnt] = 1 + cols[cnt-1];cnt++;
1491                   }
1492                 }
1493               }
1494             }
1495           }
1496           rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1];
1497           ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr);
1498         }
1499       }
1500     }
1501     /* do not copy values to GPU since they are all zero and not yet needed there */
1502     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1503     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1504     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1505     if (!isIS && bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) {
1506       ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr);
1507     }
1508     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1509     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1510   }
1511   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1512   PetscFunctionReturn(0);
1513 }
1514 
1515 /* ---------------------------------------------------------------------------------*/
1516 
1517 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da,Mat J)
1518 {
1519   PetscErrorCode         ierr;
1520   DM_DA                  *dd = (DM_DA*)da->data;
1521   PetscInt               xs,nx,i,j,gxs,gnx,row,k,l;
1522   PetscInt               m,dim,s,*cols = NULL,nc,cnt,maxcnt = 0,*ocols;
1523   PetscInt               *ofill = dd->ofill,*dfill = dd->dfill;
1524   DMBoundaryType         bx;
1525   ISLocalToGlobalMapping ltog;
1526   PetscMPIInt            rank,size;
1527 
1528   PetscFunctionBegin;
1529   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)da),&rank);CHKERRMPI(ierr);
1530   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)da),&size);CHKERRMPI(ierr);
1531 
1532   /*
1533          nc - number of components per grid point
1534 
1535   */
1536   ierr = DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);CHKERRQ(ierr);
1537   if (s > 1) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Matrix creation for 1d not implemented correctly for stencil width larger than 1");
1538   ierr = DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);CHKERRQ(ierr);
1539   ierr = DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);CHKERRQ(ierr);
1540 
1541   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1542   ierr = PetscCalloc2(nx*nc,&cols,nx*nc,&ocols);CHKERRQ(ierr);
1543 
1544   /*
1545         note should be smaller for first and last process with no periodic
1546         does not handle dfill
1547   */
1548   cnt = 0;
1549   /* coupling with process to the left */
1550   for (i=0; i<s; i++) {
1551     for (j=0; j<nc; j++) {
1552       ocols[cnt] = ((rank == 0) ? 0 : (s - i)*(ofill[j+1] - ofill[j]));
1553       cols[cnt]  = dfill[j+1] - dfill[j] + (s + i)*(ofill[j+1] - ofill[j]);
1554       if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1555         if (size > 1) ocols[cnt] += (s - i)*(ofill[j+1] - ofill[j]);
1556         else cols[cnt] += (s - i)*(ofill[j+1] - ofill[j]);
1557       }
1558       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1559       cnt++;
1560     }
1561   }
1562   for (i=s; i<nx-s; i++) {
1563     for (j=0; j<nc; j++) {
1564       cols[cnt] = dfill[j+1] - dfill[j] + 2*s*(ofill[j+1] - ofill[j]);
1565       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1566       cnt++;
1567     }
1568   }
1569   /* coupling with process to the right */
1570   for (i=nx-s; i<nx; i++) {
1571     for (j=0; j<nc; j++) {
1572       ocols[cnt] = ((rank == (size-1)) ? 0 : (i - nx + s + 1)*(ofill[j+1] - ofill[j]));
1573       cols[cnt]  = dfill[j+1] - dfill[j] + (s + nx - i - 1)*(ofill[j+1] - ofill[j]);
1574       if ((rank == size-1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1575         if (size > 1) ocols[cnt] += (i - nx + s + 1)*(ofill[j+1] - ofill[j]);
1576         else cols[cnt] += (i - nx + s + 1)*(ofill[j+1] - ofill[j]);
1577       }
1578       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1579       cnt++;
1580     }
1581   }
1582 
1583   ierr = MatSeqAIJSetPreallocation(J,0,cols);CHKERRQ(ierr);
1584   ierr = MatMPIAIJSetPreallocation(J,0,cols,0,ocols);CHKERRQ(ierr);
1585   ierr = PetscFree2(cols,ocols);CHKERRQ(ierr);
1586 
1587   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1588   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1589 
1590   /*
1591     For each node in the grid: we get the neighbors in the local (on processor ordering
1592     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1593     PETSc ordering.
1594   */
1595   if (!da->prealloc_only) {
1596     ierr = PetscMalloc1(maxcnt,&cols);CHKERRQ(ierr);
1597     row = xs*nc;
1598     /* coupling with process to the left */
1599     for (i=xs; i<xs+s; i++) {
1600       for (j=0; j<nc; j++) {
1601         cnt = 0;
1602         if (rank) {
1603           for (l=0; l<s; l++) {
1604             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1605           }
1606         }
1607         if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1608           for (l=0; l<s; l++) {
1609             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (m + i - s - l)*nc + ofill[k];
1610           }
1611         }
1612         if (dfill) {
1613           for (k=dfill[j]; k<dfill[j+1]; k++) {
1614             cols[cnt++] = i*nc + dfill[k];
1615           }
1616         } else {
1617           for (k=0; k<nc; k++) {
1618             cols[cnt++] = i*nc + k;
1619           }
1620         }
1621         for (l=0; l<s; l++) {
1622           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1623         }
1624         ierr = MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr);
1625         row++;
1626       }
1627     }
1628     for (i=xs+s; i<xs+nx-s; i++) {
1629       for (j=0; j<nc; j++) {
1630         cnt = 0;
1631         for (l=0; l<s; l++) {
1632           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1633         }
1634         if (dfill) {
1635           for (k=dfill[j]; k<dfill[j+1]; k++) {
1636             cols[cnt++] = i*nc + dfill[k];
1637           }
1638         } else {
1639           for (k=0; k<nc; k++) {
1640             cols[cnt++] = i*nc + k;
1641           }
1642         }
1643         for (l=0; l<s; l++) {
1644           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1645         }
1646         ierr = MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr);
1647         row++;
1648       }
1649     }
1650     /* coupling with process to the right */
1651     for (i=xs+nx-s; i<xs+nx; i++) {
1652       for (j=0; j<nc; j++) {
1653         cnt = 0;
1654         for (l=0; l<s; l++) {
1655           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1656         }
1657         if (dfill) {
1658           for (k=dfill[j]; k<dfill[j+1]; k++) {
1659             cols[cnt++] = i*nc + dfill[k];
1660           }
1661         } else {
1662           for (k=0; k<nc; k++) {
1663             cols[cnt++] = i*nc + k;
1664           }
1665         }
1666         if (rank < size-1) {
1667           for (l=0; l<s; l++) {
1668             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1669           }
1670         }
1671         if ((rank == size-1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1672           for (l=0; l<s; l++) {
1673             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s - l - m + 2)*nc + ofill[k];
1674           }
1675         }
1676         ierr = MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr);
1677         row++;
1678       }
1679     }
1680     ierr = PetscFree(cols);CHKERRQ(ierr);
1681     /* do not copy values to GPU since they are all zero and not yet needed there */
1682     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1683     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1684     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1685     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1686     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1687   }
1688   PetscFunctionReturn(0);
1689 }
1690 
1691 /* ---------------------------------------------------------------------------------*/
1692 
1693 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da,Mat J,PetscBool isIS)
1694 {
1695   PetscErrorCode         ierr;
1696   PetscInt               xs,nx,i,i1,slot,gxs,gnx;
1697   PetscInt               m,dim,s,*cols = NULL,nc,*rows = NULL,col,cnt,l;
1698   PetscInt               istart,iend;
1699   DMBoundaryType         bx;
1700   ISLocalToGlobalMapping ltog,mltog;
1701 
1702   PetscFunctionBegin;
1703   /*
1704          nc - number of components per grid point
1705          col - number of colors needed in one direction for single component problem
1706 
1707   */
1708   ierr = DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);CHKERRQ(ierr);
1709   if (!isIS && bx == DM_BOUNDARY_NONE) {
1710     ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE);CHKERRQ(ierr);
1711   }
1712   col  = 2*s + 1;
1713 
1714   ierr = DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);CHKERRQ(ierr);
1715   ierr = DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);CHKERRQ(ierr);
1716 
1717   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1718   ierr = MatSeqAIJSetPreallocation(J,col*nc,NULL);CHKERRQ(ierr);
1719   ierr = MatMPIAIJSetPreallocation(J,col*nc,NULL,col*nc,NULL);CHKERRQ(ierr);
1720 
1721   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1722   ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr);
1723   if (!mltog) {
1724     ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1725   }
1726 
1727   /*
1728     For each node in the grid: we get the neighbors in the local (on processor ordering
1729     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1730     PETSc ordering.
1731   */
1732   if (!da->prealloc_only) {
1733     ierr = PetscMalloc2(nc,&rows,col*nc*nc,&cols);CHKERRQ(ierr);
1734     for (i=xs; i<xs+nx; i++) {
1735       istart = PetscMax(-s,gxs - i);
1736       iend   = PetscMin(s,gxs + gnx - i - 1);
1737       slot   = i - gxs;
1738 
1739       cnt = 0;
1740       for (i1=istart; i1<iend+1; i1++) {
1741         cols[cnt++] = nc*(slot + i1);
1742         for (l=1; l<nc; l++) {
1743           cols[cnt] = 1 + cols[cnt-1];cnt++;
1744         }
1745       }
1746       rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1];
1747       ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr);
1748     }
1749     /* do not copy values to GPU since they are all zero and not yet needed there */
1750     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1751     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1752     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1753     if (!isIS && bx == DM_BOUNDARY_NONE) {
1754       ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr);
1755     }
1756     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1757     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1758     ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1759   }
1760   PetscFunctionReturn(0);
1761 }
1762 
1763 /* ---------------------------------------------------------------------------------*/
1764 
1765 PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM da,Mat J,PetscBool isIS)
1766 {
1767   PetscErrorCode         ierr;
1768   PetscInt               xs,nx,i,i1,slot,gxs,gnx;
1769   PetscInt               m,dim,s,*cols = NULL,nc,*rows = NULL,col,cnt,l;
1770   PetscInt               istart,iend;
1771   DMBoundaryType         bx;
1772   ISLocalToGlobalMapping ltog,mltog;
1773 
1774   PetscFunctionBegin;
1775   /*
1776          nc - number of components per grid point
1777          col - number of colors needed in one direction for single component problem
1778   */
1779   ierr = DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);CHKERRQ(ierr);
1780   col  = 2*s + 1;
1781 
1782   ierr = DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);CHKERRQ(ierr);
1783   ierr = DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);CHKERRQ(ierr);
1784 
1785   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1786   ierr = MatSeqAIJSetTotalPreallocation(J,nx*nc*col*nc);CHKERRQ(ierr);
1787 
1788   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1789   ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr);
1790   if (!mltog) {
1791     ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1792   }
1793 
1794   /*
1795     For each node in the grid: we get the neighbors in the local (on processor ordering
1796     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1797     PETSc ordering.
1798   */
1799   if (!da->prealloc_only) {
1800     ierr = PetscMalloc2(nc,&rows,col*nc*nc,&cols);CHKERRQ(ierr);
1801     for (i=xs; i<xs+nx; i++) {
1802       istart = PetscMax(-s,gxs - i);
1803       iend   = PetscMin(s,gxs + gnx - i - 1);
1804       slot   = i - gxs;
1805 
1806       cnt = 0;
1807       for (i1=istart; i1<iend+1; i1++) {
1808         cols[cnt++] = nc*(slot + i1);
1809         for (l=1; l<nc; l++) {
1810           cols[cnt] = 1 + cols[cnt-1];cnt++;
1811         }
1812       }
1813       rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1];
1814       ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr);
1815     }
1816     /* do not copy values to GPU since they are all zero and not yet needed there */
1817     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1818     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1819     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1820     if (!isIS && bx == DM_BOUNDARY_NONE) {
1821       ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr);
1822     }
1823     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1824     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1825     ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1826   }
1827   ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr);
1828   PetscFunctionReturn(0);
1829 }
1830 
1831 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da,Mat J)
1832 {
1833   PetscErrorCode         ierr;
1834   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1835   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
1836   PetscInt               istart,iend,jstart,jend,ii,jj;
1837   MPI_Comm               comm;
1838   PetscScalar            *values;
1839   DMBoundaryType         bx,by;
1840   DMDAStencilType        st;
1841   ISLocalToGlobalMapping ltog;
1842 
1843   PetscFunctionBegin;
1844   /*
1845      nc - number of components per grid point
1846      col - number of colors needed in one direction for single component problem
1847   */
1848   ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr);
1849   col  = 2*s + 1;
1850 
1851   ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr);
1852   ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr);
1853   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1854 
1855   ierr = PetscMalloc1(col*col*nc*nc,&cols);CHKERRQ(ierr);
1856 
1857   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1858 
1859   /* determine the matrix preallocation information */
1860   ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
1861   for (i=xs; i<xs+nx; i++) {
1862     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1863     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1864     for (j=ys; j<ys+ny; j++) {
1865       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1866       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1867       slot   = i - gxs + gnx*(j - gys);
1868 
1869       /* Find block columns in block row */
1870       cnt = 0;
1871       for (ii=istart; ii<iend+1; ii++) {
1872         for (jj=jstart; jj<jend+1; jj++) {
1873           if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1874             cols[cnt++] = slot + ii + gnx*jj;
1875           }
1876         }
1877       }
1878       ierr = MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1879     }
1880   }
1881   ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1882   ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1883   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1884 
1885   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1886 
1887   /*
1888     For each node in the grid: we get the neighbors in the local (on processor ordering
1889     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1890     PETSc ordering.
1891   */
1892   if (!da->prealloc_only) {
1893     ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr);
1894     for (i=xs; i<xs+nx; i++) {
1895       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1896       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1897       for (j=ys; j<ys+ny; j++) {
1898         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1899         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1900         slot = i - gxs + gnx*(j - gys);
1901         cnt  = 0;
1902         for (ii=istart; ii<iend+1; ii++) {
1903           for (jj=jstart; jj<jend+1; jj++) {
1904             if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1905               cols[cnt++] = slot + ii + gnx*jj;
1906             }
1907           }
1908         }
1909         ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1910       }
1911     }
1912     ierr = PetscFree(values);CHKERRQ(ierr);
1913     /* do not copy values to GPU since they are all zero and not yet needed there */
1914     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1915     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1916     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1917     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1918     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1919   }
1920   ierr = PetscFree(cols);CHKERRQ(ierr);
1921   PetscFunctionReturn(0);
1922 }
1923 
1924 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da,Mat J)
1925 {
1926   PetscErrorCode         ierr;
1927   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1928   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
1929   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1930   MPI_Comm               comm;
1931   PetscScalar            *values;
1932   DMBoundaryType         bx,by,bz;
1933   DMDAStencilType        st;
1934   ISLocalToGlobalMapping ltog;
1935 
1936   PetscFunctionBegin;
1937   /*
1938          nc - number of components per grid point
1939          col - number of colors needed in one direction for single component problem
1940 
1941   */
1942   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,NULL,NULL,NULL,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1943   col  = 2*s + 1;
1944 
1945   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1946   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1947   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1948 
1949   ierr = PetscMalloc1(col*col*col,&cols);CHKERRQ(ierr);
1950 
1951   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1952 
1953   /* determine the matrix preallocation information */
1954   ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1955   for (i=xs; i<xs+nx; i++) {
1956     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1957     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1958     for (j=ys; j<ys+ny; j++) {
1959       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1960       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1961       for (k=zs; k<zs+nz; k++) {
1962         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1963         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1964 
1965         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1966 
1967         /* Find block columns in block row */
1968         cnt = 0;
1969         for (ii=istart; ii<iend+1; ii++) {
1970           for (jj=jstart; jj<jend+1; jj++) {
1971             for (kk=kstart; kk<kend+1; kk++) {
1972               if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1973                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1974               }
1975             }
1976           }
1977         }
1978         ierr = MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1979       }
1980     }
1981   }
1982   ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1983   ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1984   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1985 
1986   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1987 
1988   /*
1989     For each node in the grid: we get the neighbors in the local (on processor ordering
1990     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1991     PETSc ordering.
1992   */
1993   if (!da->prealloc_only) {
1994     ierr = PetscCalloc1(col*col*col*nc*nc,&values);CHKERRQ(ierr);
1995     for (i=xs; i<xs+nx; i++) {
1996       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1997       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1998       for (j=ys; j<ys+ny; j++) {
1999         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2000         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2001         for (k=zs; k<zs+nz; k++) {
2002           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2003           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2004 
2005           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2006 
2007           cnt = 0;
2008           for (ii=istart; ii<iend+1; ii++) {
2009             for (jj=jstart; jj<jend+1; jj++) {
2010               for (kk=kstart; kk<kend+1; kk++) {
2011                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
2012                   cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
2013                 }
2014               }
2015             }
2016           }
2017           ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
2018         }
2019       }
2020     }
2021     ierr = PetscFree(values);CHKERRQ(ierr);
2022     /* do not copy values to GPU since they are all zero and not yet needed there */
2023     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
2024     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2025     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2026     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
2027     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2028   }
2029   ierr = PetscFree(cols);CHKERRQ(ierr);
2030   PetscFunctionReturn(0);
2031 }
2032 
2033 /*
2034   This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to
2035   identify in the local ordering with periodic domain.
2036 */
2037 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog,PetscInt *row,PetscInt *cnt,PetscInt col[])
2038 {
2039   PetscErrorCode ierr;
2040   PetscInt       i,n;
2041 
2042   PetscFunctionBegin;
2043   ierr = ISLocalToGlobalMappingApplyBlock(ltog,1,row,row);CHKERRQ(ierr);
2044   ierr = ISLocalToGlobalMappingApplyBlock(ltog,*cnt,col,col);CHKERRQ(ierr);
2045   for (i=0,n=0; i<*cnt; i++) {
2046     if (col[i] >= *row) col[n++] = col[i];
2047   }
2048   *cnt = n;
2049   PetscFunctionReturn(0);
2050 }
2051 
2052 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da,Mat J)
2053 {
2054   PetscErrorCode         ierr;
2055   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
2056   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
2057   PetscInt               istart,iend,jstart,jend,ii,jj;
2058   MPI_Comm               comm;
2059   PetscScalar            *values;
2060   DMBoundaryType         bx,by;
2061   DMDAStencilType        st;
2062   ISLocalToGlobalMapping ltog;
2063 
2064   PetscFunctionBegin;
2065   /*
2066      nc - number of components per grid point
2067      col - number of colors needed in one direction for single component problem
2068   */
2069   ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr);
2070   col  = 2*s + 1;
2071 
2072   ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr);
2073   ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr);
2074   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
2075 
2076   ierr = PetscMalloc1(col*col*nc*nc,&cols);CHKERRQ(ierr);
2077 
2078   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
2079 
2080   /* determine the matrix preallocation information */
2081   ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
2082   for (i=xs; i<xs+nx; i++) {
2083     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2084     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2085     for (j=ys; j<ys+ny; j++) {
2086       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2087       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2088       slot   = i - gxs + gnx*(j - gys);
2089 
2090       /* Find block columns in block row */
2091       cnt = 0;
2092       for (ii=istart; ii<iend+1; ii++) {
2093         for (jj=jstart; jj<jend+1; jj++) {
2094           if (st == DMDA_STENCIL_BOX || !ii || !jj) {
2095             cols[cnt++] = slot + ii + gnx*jj;
2096           }
2097         }
2098       }
2099       ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
2100       ierr = MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
2101     }
2102   }
2103   ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
2104   ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
2105   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2106 
2107   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
2108 
2109   /*
2110     For each node in the grid: we get the neighbors in the local (on processor ordering
2111     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2112     PETSc ordering.
2113   */
2114   if (!da->prealloc_only) {
2115     ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr);
2116     for (i=xs; i<xs+nx; i++) {
2117       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2118       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2119       for (j=ys; j<ys+ny; j++) {
2120         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2121         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2122         slot   = i - gxs + gnx*(j - gys);
2123 
2124         /* Find block columns in block row */
2125         cnt = 0;
2126         for (ii=istart; ii<iend+1; ii++) {
2127           for (jj=jstart; jj<jend+1; jj++) {
2128             if (st == DMDA_STENCIL_BOX || !ii || !jj) {
2129               cols[cnt++] = slot + ii + gnx*jj;
2130             }
2131           }
2132         }
2133         ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
2134         ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
2135       }
2136     }
2137     ierr = PetscFree(values);CHKERRQ(ierr);
2138     /* do not copy values to GPU since they are all zero and not yet needed there */
2139     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
2140     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2141     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2142     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
2143     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2144   }
2145   ierr = PetscFree(cols);CHKERRQ(ierr);
2146   PetscFunctionReturn(0);
2147 }
2148 
2149 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da,Mat J)
2150 {
2151   PetscErrorCode         ierr;
2152   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
2153   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
2154   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
2155   MPI_Comm               comm;
2156   PetscScalar            *values;
2157   DMBoundaryType         bx,by,bz;
2158   DMDAStencilType        st;
2159   ISLocalToGlobalMapping ltog;
2160 
2161   PetscFunctionBegin;
2162   /*
2163      nc - number of components per grid point
2164      col - number of colors needed in one direction for single component problem
2165   */
2166   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,NULL,NULL,NULL,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
2167   col  = 2*s + 1;
2168 
2169   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
2170   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
2171   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
2172 
2173   /* create the matrix */
2174   ierr = PetscMalloc1(col*col*col,&cols);CHKERRQ(ierr);
2175 
2176   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
2177 
2178   /* determine the matrix preallocation information */
2179   ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
2180   for (i=xs; i<xs+nx; i++) {
2181     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2182     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2183     for (j=ys; j<ys+ny; j++) {
2184       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2185       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2186       for (k=zs; k<zs+nz; k++) {
2187         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2188         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2189 
2190         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2191 
2192         /* Find block columns in block row */
2193         cnt = 0;
2194         for (ii=istart; ii<iend+1; ii++) {
2195           for (jj=jstart; jj<jend+1; jj++) {
2196             for (kk=kstart; kk<kend+1; kk++) {
2197               if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
2198                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
2199               }
2200             }
2201           }
2202         }
2203         ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
2204         ierr = MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
2205       }
2206     }
2207   }
2208   ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
2209   ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
2210   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2211 
2212   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
2213 
2214   /*
2215     For each node in the grid: we get the neighbors in the local (on processor ordering
2216     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2217     PETSc ordering.
2218   */
2219   if (!da->prealloc_only) {
2220     ierr = PetscCalloc1(col*col*col*nc*nc,&values);CHKERRQ(ierr);
2221     for (i=xs; i<xs+nx; i++) {
2222       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2223       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2224       for (j=ys; j<ys+ny; j++) {
2225         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2226         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2227         for (k=zs; k<zs+nz; k++) {
2228           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2229           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2230 
2231           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2232 
2233           cnt = 0;
2234           for (ii=istart; ii<iend+1; ii++) {
2235             for (jj=jstart; jj<jend+1; jj++) {
2236               for (kk=kstart; kk<kend+1; kk++) {
2237                 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
2238                   cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
2239                 }
2240               }
2241             }
2242           }
2243           ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
2244           ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
2245         }
2246       }
2247     }
2248     ierr = PetscFree(values);CHKERRQ(ierr);
2249     /* do not copy values to GPU since they are all zero and not yet needed there */
2250     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
2251     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2252     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2253     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
2254     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2255   }
2256   ierr = PetscFree(cols);CHKERRQ(ierr);
2257   PetscFunctionReturn(0);
2258 }
2259 
2260 /* ---------------------------------------------------------------------------------*/
2261 
2262 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da,Mat J)
2263 {
2264   PetscErrorCode         ierr;
2265   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
2266   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt, maxcnt = 0,l,p,*dnz,*onz;
2267   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
2268   DM_DA                  *dd = (DM_DA*)da->data;
2269   PetscInt               ifill_col,*dfill = dd->dfill,*ofill = dd->ofill;
2270   MPI_Comm               comm;
2271   PetscScalar            *values;
2272   DMBoundaryType         bx,by,bz;
2273   ISLocalToGlobalMapping ltog;
2274   DMDAStencilType        st;
2275   PetscBool              removedups = PETSC_FALSE;
2276 
2277   PetscFunctionBegin;
2278   /*
2279          nc - number of components per grid point
2280          col - number of colors needed in one direction for single component problem
2281 
2282   */
2283   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
2284   col  = 2*s + 1;
2285   if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
2286                  by 2*stencil_width + 1\n");
2287   if (by == DM_BOUNDARY_PERIODIC && (n % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
2288                  by 2*stencil_width + 1\n");
2289   if (bz == DM_BOUNDARY_PERIODIC && (p % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
2290                  by 2*stencil_width + 1\n");
2291 
2292   /*
2293        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
2294        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
2295   */
2296   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
2297   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
2298   if (P == 1 && 2*s >= p) removedups = PETSC_TRUE;
2299 
2300   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
2301   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
2302   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
2303 
2304   ierr = PetscMalloc1(col*col*col*nc,&cols);CHKERRQ(ierr);
2305   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
2306 
2307   /* determine the matrix preallocation information */
2308   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
2309 
2310   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
2311   for (i=xs; i<xs+nx; i++) {
2312     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2313     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2314     for (j=ys; j<ys+ny; j++) {
2315       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2316       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2317       for (k=zs; k<zs+nz; k++) {
2318         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2319         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2320 
2321         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2322 
2323         for (l=0; l<nc; l++) {
2324           cnt = 0;
2325           for (ii=istart; ii<iend+1; ii++) {
2326             for (jj=jstart; jj<jend+1; jj++) {
2327               for (kk=kstart; kk<kend+1; kk++) {
2328                 if (ii || jj || kk) {
2329                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
2330                     for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2331                   }
2332                 } else {
2333                   if (dfill) {
2334                     for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2335                   } else {
2336                     for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2337                   }
2338                 }
2339               }
2340             }
2341           }
2342           row  = l + nc*(slot);
2343           maxcnt = PetscMax(maxcnt,cnt);
2344           if (removedups) {
2345             ierr = MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
2346           } else {
2347             ierr = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
2348           }
2349         }
2350       }
2351     }
2352   }
2353   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
2354   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
2355   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2356   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
2357 
2358   /*
2359     For each node in the grid: we get the neighbors in the local (on processor ordering
2360     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2361     PETSc ordering.
2362   */
2363   if (!da->prealloc_only) {
2364     ierr = PetscCalloc1(maxcnt,&values);CHKERRQ(ierr);
2365     for (i=xs; i<xs+nx; i++) {
2366       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2367       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2368       for (j=ys; j<ys+ny; j++) {
2369         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2370         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2371         for (k=zs; k<zs+nz; k++) {
2372           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2373           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2374 
2375           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2376 
2377           for (l=0; l<nc; l++) {
2378             cnt = 0;
2379             for (ii=istart; ii<iend+1; ii++) {
2380               for (jj=jstart; jj<jend+1; jj++) {
2381                 for (kk=kstart; kk<kend+1; kk++) {
2382                   if (ii || jj || kk) {
2383                     if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
2384                       for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2385                     }
2386                   } else {
2387                     if (dfill) {
2388                       for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2389                     } else {
2390                       for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2391                     }
2392                   }
2393                 }
2394               }
2395             }
2396             row  = l + nc*(slot);
2397             ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
2398           }
2399         }
2400       }
2401     }
2402     ierr = PetscFree(values);CHKERRQ(ierr);
2403     /* do not copy values to GPU since they are all zero and not yet needed there */
2404     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
2405     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2406     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2407     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
2408     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2409   }
2410   ierr = PetscFree(cols);CHKERRQ(ierr);
2411   PetscFunctionReturn(0);
2412 }
2413