xref: /petsc/src/dm/impls/da/fdda.c (revision 8ebe3e4e9e00d86ece2e9fcd0cc84910b0ad437c)
1 
2 #include <petsc/private/dmdaimpl.h> /*I      "petscdmda.h"     I*/
3 #include <petscmat.h>
4 
5 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM,ISColoringType,ISColoring*);
6 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM,ISColoringType,ISColoring*);
7 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM,ISColoringType,ISColoring*);
8 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM,ISColoringType,ISColoring*);
9 
10 /*
11    For ghost i that may be negative or greater than the upper bound this
12   maps it into the 0:m-1 range using periodicity
13 */
14 #define SetInRange(i,m) ((i < 0) ? m+i : ((i >= m) ? i-m : i))
15 
16 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill,PetscInt w,PetscInt **rfill)
17 {
18   PetscErrorCode ierr;
19   PetscInt       i,j,nz,*fill;
20 
21   PetscFunctionBegin;
22   if (!dfill) PetscFunctionReturn(0);
23 
24   /* count number nonzeros */
25   nz = 0;
26   for (i=0; i<w; i++) {
27     for (j=0; j<w; j++) {
28       if (dfill[w*i+j]) nz++;
29     }
30   }
31   ierr = PetscMalloc1(nz + w + 1,&fill);CHKERRQ(ierr);
32   /* construct modified CSR storage of nonzero structure */
33   /*  fill[0 -- w] marks starts of each row of column indices (and end of last row)
34    so fill[1] - fill[0] gives number of nonzeros in first row etc */
35   nz = w + 1;
36   for (i=0; i<w; i++) {
37     fill[i] = nz;
38     for (j=0; j<w; j++) {
39       if (dfill[w*i+j]) {
40         fill[nz] = j;
41         nz++;
42       }
43     }
44   }
45   fill[w] = nz;
46 
47   *rfill = fill;
48   PetscFunctionReturn(0);
49 }
50 
51 static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse,PetscInt w,PetscInt **rfill)
52 {
53   PetscErrorCode ierr;
54   PetscInt       nz;
55 
56   PetscFunctionBegin;
57   if (!dfillsparse) PetscFunctionReturn(0);
58 
59   /* Determine number of non-zeros */
60   nz = (dfillsparse[w] - w - 1);
61 
62   /* Allocate space for our copy of the given sparse matrix representation. */
63   ierr = PetscMalloc1(nz + w + 1,rfill);CHKERRQ(ierr);
64   ierr = PetscArraycpy(*rfill,dfillsparse,nz+w+1);CHKERRQ(ierr);
65   PetscFunctionReturn(0);
66 }
67 
68 static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd)
69 {
70   PetscErrorCode ierr;
71   PetscInt       i,k,cnt = 1;
72 
73   PetscFunctionBegin;
74 
75   /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of
76    columns to the left with any nonzeros in them plus 1 */
77   ierr = PetscCalloc1(dd->w,&dd->ofillcols);CHKERRQ(ierr);
78   for (i=0; i<dd->w; i++) {
79     for (k=dd->ofill[i]; k<dd->ofill[i+1]; k++) dd->ofillcols[dd->ofill[k]] = 1;
80   }
81   for (i=0; i<dd->w; i++) {
82     if (dd->ofillcols[i]) {
83       dd->ofillcols[i] = cnt++;
84     }
85   }
86   PetscFunctionReturn(0);
87 }
88 
89 /*@
90     DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem
91     of the matrix returned by DMCreateMatrix().
92 
93     Logically Collective on da
94 
95     Input Parameters:
96 +   da - the distributed array
97 .   dfill - the fill pattern in the diagonal block (may be NULL, means use dense block)
98 -   ofill - the fill pattern in the off-diagonal blocks
99 
100     Level: developer
101 
102     Notes:
103     This only makes sense when you are doing multicomponent problems but using the
104        MPIAIJ matrix format
105 
106            The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries
107        representing coupling and 0 entries for missing coupling. For example
108 $             dfill[9] = {1, 0, 0,
109 $                         1, 1, 0,
110 $                         0, 1, 1}
111        means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with
112        itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the
113        diagonal block).
114 
115      DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
116      can be represented in the dfill, ofill format
117 
118    Contributed by Glenn Hammond
119 
120 .seealso DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly()
121 
122 @*/
123 PetscErrorCode  DMDASetBlockFills(DM da,const PetscInt *dfill,const PetscInt *ofill)
124 {
125   DM_DA          *dd = (DM_DA*)da->data;
126   PetscErrorCode ierr;
127 
128   PetscFunctionBegin;
129   /* save the given dfill and ofill information */
130   ierr = DMDASetBlockFills_Private(dfill,dd->w,&dd->dfill);CHKERRQ(ierr);
131   ierr = DMDASetBlockFills_Private(ofill,dd->w,&dd->ofill);CHKERRQ(ierr);
132 
133   /* count nonzeros in ofill columns */
134   ierr = DMDASetBlockFills_Private2(dd);CHKERRQ(ierr);
135 
136   PetscFunctionReturn(0);
137 }
138 
139 /*@
140     DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem
141     of the matrix returned by DMCreateMatrix(), using sparse representations
142     of fill patterns.
143 
144     Logically Collective on da
145 
146     Input Parameters:
147 +   da - the distributed array
148 .   dfill - the sparse fill pattern in the diagonal block (may be NULL, means use dense block)
149 -   ofill - the sparse fill pattern in the off-diagonal blocks
150 
151     Level: developer
152 
153     Notes: This only makes sense when you are doing multicomponent problems but using the
154        MPIAIJ matrix format
155 
156            The format for dfill and ofill is a sparse representation of a
157            dof-by-dof matrix with 1 entries representing coupling and 0 entries
158            for missing coupling.  The sparse representation is a 1 dimensional
159            array of length nz + dof + 1, where nz is the number of non-zeros in
160            the matrix.  The first dof entries in the array give the
161            starting array indices of each row's items in the rest of the array,
162            the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array)
163            and the remaining nz items give the column indices of each of
164            the 1s within the logical 2D matrix.  Each row's items within
165            the array are the column indices of the 1s within that row
166            of the 2D matrix.  PETSc developers may recognize that this is the
167            same format as that computed by the DMDASetBlockFills_Private()
168            function from a dense 2D matrix representation.
169 
170      DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
171      can be represented in the dfill, ofill format
172 
173    Contributed by Philip C. Roth
174 
175 .seealso DMDASetBlockFills(), DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly()
176 
177 @*/
178 PetscErrorCode  DMDASetBlockFillsSparse(DM da,const PetscInt *dfillsparse,const PetscInt *ofillsparse)
179 {
180   DM_DA          *dd = (DM_DA*)da->data;
181   PetscErrorCode ierr;
182 
183   PetscFunctionBegin;
184   /* save the given dfill and ofill information */
185   ierr = DMDASetBlockFillsSparse_Private(dfillsparse,dd->w,&dd->dfill);CHKERRQ(ierr);
186   ierr = DMDASetBlockFillsSparse_Private(ofillsparse,dd->w,&dd->ofill);CHKERRQ(ierr);
187 
188   /* count nonzeros in ofill columns */
189   ierr = DMDASetBlockFills_Private2(dd);CHKERRQ(ierr);
190 
191   PetscFunctionReturn(0);
192 }
193 
194 PetscErrorCode  DMCreateColoring_DA(DM da,ISColoringType ctype,ISColoring *coloring)
195 {
196   PetscErrorCode   ierr;
197   PetscInt         dim,m,n,p,nc;
198   DMBoundaryType   bx,by,bz;
199   MPI_Comm         comm;
200   PetscMPIInt      size;
201   PetscBool        isBAIJ;
202   DM_DA            *dd = (DM_DA*)da->data;
203 
204   PetscFunctionBegin;
205   /*
206                                   m
207           ------------------------------------------------------
208          |                                                     |
209          |                                                     |
210          |               ----------------------                |
211          |               |                    |                |
212       n  |           yn  |                    |                |
213          |               |                    |                |
214          |               .---------------------                |
215          |             (xs,ys)     xn                          |
216          |            .                                        |
217          |         (gxs,gys)                                   |
218          |                                                     |
219           -----------------------------------------------------
220   */
221 
222   /*
223          nc - number of components per grid point
224          col - number of colors needed in one direction for single component problem
225 
226   */
227   ierr = DMDAGetInfo(da,&dim,NULL,NULL,NULL,&m,&n,&p,&nc,NULL,&bx,&by,&bz,NULL);CHKERRQ(ierr);
228 
229   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
230   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
231   if (ctype == IS_COLORING_LOCAL) {
232     if (size == 1) {
233       ctype = IS_COLORING_GLOBAL;
234     } else if (dim > 1) {
235       if ((m==1 && bx == DM_BOUNDARY_PERIODIC) || (n==1 && by == DM_BOUNDARY_PERIODIC) || (p==1 && bz == DM_BOUNDARY_PERIODIC)) {
236         SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain  on the same process");
237       }
238     }
239   }
240 
241   /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ
242      matrices is for the blocks, not the individual matrix elements  */
243   ierr = PetscStrbeginswith(da->mattype,MATBAIJ,&isBAIJ);CHKERRQ(ierr);
244   if (!isBAIJ) {ierr = PetscStrbeginswith(da->mattype,MATMPIBAIJ,&isBAIJ);CHKERRQ(ierr);}
245   if (!isBAIJ) {ierr = PetscStrbeginswith(da->mattype,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);}
246   if (isBAIJ) {
247     dd->w  = 1;
248     dd->xs = dd->xs/nc;
249     dd->xe = dd->xe/nc;
250     dd->Xs = dd->Xs/nc;
251     dd->Xe = dd->Xe/nc;
252   }
253 
254   /*
255      We do not provide a getcoloring function in the DMDA operations because
256    the basic DMDA does not know about matrices. We think of DMDA as being
257    more low-level then matrices.
258   */
259   if (dim == 1) {
260     ierr = DMCreateColoring_DA_1d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
261   } else if (dim == 2) {
262     ierr = DMCreateColoring_DA_2d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
263   } else if (dim == 3) {
264     ierr = DMCreateColoring_DA_3d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
265   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not done for %D dimension, send us mail petsc-maint@mcs.anl.gov for code",dim);
266   if (isBAIJ) {
267     dd->w  = nc;
268     dd->xs = dd->xs*nc;
269     dd->xe = dd->xe*nc;
270     dd->Xs = dd->Xs*nc;
271     dd->Xe = dd->Xe*nc;
272   }
273   PetscFunctionReturn(0);
274 }
275 
276 /* ---------------------------------------------------------------------------------*/
277 
278 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
279 {
280   PetscErrorCode  ierr;
281   PetscInt        xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,M,N,dim,s,k,nc,col;
282   PetscInt        ncolors;
283   MPI_Comm        comm;
284   DMBoundaryType  bx,by;
285   DMDAStencilType st;
286   ISColoringValue *colors;
287   DM_DA           *dd = (DM_DA*)da->data;
288 
289   PetscFunctionBegin;
290   /*
291          nc - number of components per grid point
292          col - number of colors needed in one direction for single component problem
293 
294   */
295   ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,&M,&N,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr);
296   col  = 2*s + 1;
297   ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr);
298   ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr);
299   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
300 
301   /* special case as taught to us by Paul Hovland */
302   if (st == DMDA_STENCIL_STAR && s == 1) {
303     ierr = DMCreateColoring_DA_2d_5pt_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
304   } else {
305     if (ctype == IS_COLORING_GLOBAL) {
306       if (!dd->localcoloring) {
307         ierr = PetscMalloc1(nc*nx*ny,&colors);CHKERRQ(ierr);
308         ii   = 0;
309         for (j=ys; j<ys+ny; j++) {
310           for (i=xs; i<xs+nx; i++) {
311             for (k=0; k<nc; k++) {
312               colors[ii++] = k + nc*((i % col) + col*(j % col));
313             }
314           }
315         }
316         ncolors = nc + nc*(col-1 + col*(col-1));
317         ierr    = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
318       }
319       *coloring = dd->localcoloring;
320     } else if (ctype == IS_COLORING_LOCAL) {
321       if (!dd->ghostedcoloring) {
322         ierr = PetscMalloc1(nc*gnx*gny,&colors);CHKERRQ(ierr);
323         ii   = 0;
324         for (j=gys; j<gys+gny; j++) {
325           for (i=gxs; i<gxs+gnx; i++) {
326             for (k=0; k<nc; k++) {
327               /* the complicated stuff is to handle periodic boundaries */
328               colors[ii++] = k + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col));
329             }
330           }
331         }
332         ncolors = nc + nc*(col - 1 + col*(col-1));
333         ierr    = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
334         /* PetscIntView(ncolors,(PetscInt*)colors,0); */
335 
336         ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
337       }
338       *coloring = dd->ghostedcoloring;
339     } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
340   }
341   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
342   PetscFunctionReturn(0);
343 }
344 
345 /* ---------------------------------------------------------------------------------*/
346 
347 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
348 {
349   PetscErrorCode  ierr;
350   PetscInt        xs,ys,nx,ny,i,j,gxs,gys,gnx,gny,m,n,p,dim,s,k,nc,col,zs,gzs,ii,l,nz,gnz,M,N,P;
351   PetscInt        ncolors;
352   MPI_Comm        comm;
353   DMBoundaryType  bx,by,bz;
354   DMDAStencilType st;
355   ISColoringValue *colors;
356   DM_DA           *dd = (DM_DA*)da->data;
357 
358   PetscFunctionBegin;
359   /*
360          nc - number of components per grid point
361          col - number of colors needed in one direction for single component problem
362 
363   */
364   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
365   col  = 2*s + 1;
366   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
367   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
368   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
369 
370   /* create the coloring */
371   if (ctype == IS_COLORING_GLOBAL) {
372     if (!dd->localcoloring) {
373       ierr = PetscMalloc1(nc*nx*ny*nz,&colors);CHKERRQ(ierr);
374       ii   = 0;
375       for (k=zs; k<zs+nz; k++) {
376         for (j=ys; j<ys+ny; j++) {
377           for (i=xs; i<xs+nx; i++) {
378             for (l=0; l<nc; l++) {
379               colors[ii++] = l + nc*((i % col) + col*(j % col) + col*col*(k % col));
380             }
381           }
382         }
383       }
384       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
385       ierr    = ISColoringCreate(comm,ncolors,nc*nx*ny*nz,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
386     }
387     *coloring = dd->localcoloring;
388   } else if (ctype == IS_COLORING_LOCAL) {
389     if (!dd->ghostedcoloring) {
390       ierr = PetscMalloc1(nc*gnx*gny*gnz,&colors);CHKERRQ(ierr);
391       ii   = 0;
392       for (k=gzs; k<gzs+gnz; k++) {
393         for (j=gys; j<gys+gny; j++) {
394           for (i=gxs; i<gxs+gnx; i++) {
395             for (l=0; l<nc; l++) {
396               /* the complicated stuff is to handle periodic boundaries */
397               colors[ii++] = l + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col) + col*col*(SetInRange(k,p) % col));
398             }
399           }
400         }
401       }
402       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
403       ierr    = ISColoringCreate(comm,ncolors,nc*gnx*gny*gnz,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
404       ierr    = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
405     }
406     *coloring = dd->ghostedcoloring;
407   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
408   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
409   PetscFunctionReturn(0);
410 }
411 
412 /* ---------------------------------------------------------------------------------*/
413 
414 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
415 {
416   PetscErrorCode  ierr;
417   PetscInt        xs,nx,i,i1,gxs,gnx,l,m,M,dim,s,nc,col;
418   PetscInt        ncolors;
419   MPI_Comm        comm;
420   DMBoundaryType  bx;
421   ISColoringValue *colors;
422   DM_DA           *dd = (DM_DA*)da->data;
423 
424   PetscFunctionBegin;
425   /*
426          nc - number of components per grid point
427          col - number of colors needed in one direction for single component problem
428 
429   */
430   ierr = DMDAGetInfo(da,&dim,&m,NULL,NULL,&M,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);CHKERRQ(ierr);
431   col  = 2*s + 1;
432   ierr = DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);CHKERRQ(ierr);
433   ierr = DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);CHKERRQ(ierr);
434   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
435 
436   /* create the coloring */
437   if (ctype == IS_COLORING_GLOBAL) {
438     if (!dd->localcoloring) {
439       ierr = PetscMalloc1(nc*nx,&colors);CHKERRQ(ierr);
440       if (dd->ofillcols) {
441         PetscInt tc = 0;
442         for (i=0; i<nc; i++) tc += (PetscInt) (dd->ofillcols[i] > 0);
443         i1 = 0;
444         for (i=xs; i<xs+nx; i++) {
445           for (l=0; l<nc; l++) {
446             if (dd->ofillcols[l] && (i % col)) {
447               colors[i1++] =  nc - 1 + tc*((i % col) - 1) + dd->ofillcols[l];
448             } else {
449               colors[i1++] = l;
450             }
451           }
452         }
453         ncolors = nc + 2*s*tc;
454       } else {
455         i1 = 0;
456         for (i=xs; i<xs+nx; i++) {
457           for (l=0; l<nc; l++) {
458             colors[i1++] = l + nc*(i % col);
459           }
460         }
461         ncolors = nc + nc*(col-1);
462       }
463       ierr = ISColoringCreate(comm,ncolors,nc*nx,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
464     }
465     *coloring = dd->localcoloring;
466   } else if (ctype == IS_COLORING_LOCAL) {
467     if (!dd->ghostedcoloring) {
468       ierr = PetscMalloc1(nc*gnx,&colors);CHKERRQ(ierr);
469       i1   = 0;
470       for (i=gxs; i<gxs+gnx; i++) {
471         for (l=0; l<nc; l++) {
472           /* the complicated stuff is to handle periodic boundaries */
473           colors[i1++] = l + nc*(SetInRange(i,m) % col);
474         }
475       }
476       ncolors = nc + nc*(col-1);
477       ierr    = ISColoringCreate(comm,ncolors,nc*gnx,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
478       ierr    = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
479     }
480     *coloring = dd->ghostedcoloring;
481   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
482   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
483   PetscFunctionReturn(0);
484 }
485 
486 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
487 {
488   PetscErrorCode  ierr;
489   PetscInt        xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,dim,s,k,nc;
490   PetscInt        ncolors;
491   MPI_Comm        comm;
492   DMBoundaryType  bx,by;
493   ISColoringValue *colors;
494   DM_DA           *dd = (DM_DA*)da->data;
495 
496   PetscFunctionBegin;
497   /*
498          nc - number of components per grid point
499          col - number of colors needed in one direction for single component problem
500 
501   */
502   ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,NULL);CHKERRQ(ierr);
503   ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr);
504   ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr);
505   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
506   /* create the coloring */
507   if (ctype == IS_COLORING_GLOBAL) {
508     if (!dd->localcoloring) {
509       ierr = PetscMalloc1(nc*nx*ny,&colors);CHKERRQ(ierr);
510       ii   = 0;
511       for (j=ys; j<ys+ny; j++) {
512         for (i=xs; i<xs+nx; i++) {
513           for (k=0; k<nc; k++) {
514             colors[ii++] = k + nc*((3*j+i) % 5);
515           }
516         }
517       }
518       ncolors = 5*nc;
519       ierr    = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
520     }
521     *coloring = dd->localcoloring;
522   } else if (ctype == IS_COLORING_LOCAL) {
523     if (!dd->ghostedcoloring) {
524       ierr = PetscMalloc1(nc*gnx*gny,&colors);CHKERRQ(ierr);
525       ii = 0;
526       for (j=gys; j<gys+gny; j++) {
527         for (i=gxs; i<gxs+gnx; i++) {
528           for (k=0; k<nc; k++) {
529             colors[ii++] = k + nc*((3*SetInRange(j,n) + SetInRange(i,m)) % 5);
530           }
531         }
532       }
533       ncolors = 5*nc;
534       ierr    = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
535       ierr    = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
536     }
537     *coloring = dd->ghostedcoloring;
538   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
539   PetscFunctionReturn(0);
540 }
541 
542 /* =========================================================================== */
543 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM,Mat,PetscBool);
544 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM,Mat);
545 extern PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM,Mat,PetscBool);
546 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM,Mat,PetscBool);
547 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM,Mat);
548 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM,Mat,PetscBool);
549 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM,Mat);
550 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM,Mat);
551 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM,Mat);
552 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM,Mat);
553 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM,Mat);
554 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM,Mat);
555 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM,Mat);
556 extern PetscErrorCode DMCreateMatrix_DA_IS(DM,Mat);
557 
558 /*@C
559    MatSetupDM - Sets the DMDA that is to be used by the HYPRE_StructMatrix PETSc matrix
560 
561    Logically Collective on mat
562 
563    Input Parameters:
564 +  mat - the matrix
565 -  da - the da
566 
567    Level: intermediate
568 
569 @*/
570 PetscErrorCode MatSetupDM(Mat mat,DM da)
571 {
572   PetscErrorCode ierr;
573 
574   PetscFunctionBegin;
575   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
576   PetscValidHeaderSpecificType(da,DM_CLASSID,2,DMDA);
577   ierr = PetscTryMethod(mat,"MatSetupDM_C",(Mat,DM),(mat,da));CHKERRQ(ierr);
578   PetscFunctionReturn(0);
579 }
580 
581 PetscErrorCode  MatView_MPI_DA(Mat A,PetscViewer viewer)
582 {
583   DM                da;
584   PetscErrorCode    ierr;
585   const char        *prefix;
586   Mat               Anatural;
587   AO                ao;
588   PetscInt          rstart,rend,*petsc,i;
589   IS                is;
590   MPI_Comm          comm;
591   PetscViewerFormat format;
592 
593   PetscFunctionBegin;
594   /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */
595   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
596   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(0);
597 
598   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
599   ierr = MatGetDM(A, &da);CHKERRQ(ierr);
600   if (!da) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
601 
602   ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);
603   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
604   ierr = PetscMalloc1(rend-rstart,&petsc);CHKERRQ(ierr);
605   for (i=rstart; i<rend; i++) petsc[i-rstart] = i;
606   ierr = AOApplicationToPetsc(ao,rend-rstart,petsc);CHKERRQ(ierr);
607   ierr = ISCreateGeneral(comm,rend-rstart,petsc,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
608 
609   /* call viewer on natural ordering */
610   ierr = MatCreateSubMatrix(A,is,is,MAT_INITIAL_MATRIX,&Anatural);CHKERRQ(ierr);
611   ierr = ISDestroy(&is);CHKERRQ(ierr);
612   ierr = PetscObjectGetOptionsPrefix((PetscObject)A,&prefix);CHKERRQ(ierr);
613   ierr = PetscObjectSetOptionsPrefix((PetscObject)Anatural,prefix);CHKERRQ(ierr);
614   ierr = PetscObjectSetName((PetscObject)Anatural,((PetscObject)A)->name);CHKERRQ(ierr);
615   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE;
616   ierr = MatView(Anatural,viewer);CHKERRQ(ierr);
617   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE;
618   ierr = MatDestroy(&Anatural);CHKERRQ(ierr);
619   PetscFunctionReturn(0);
620 }
621 
622 PetscErrorCode  MatLoad_MPI_DA(Mat A,PetscViewer viewer)
623 {
624   DM             da;
625   PetscErrorCode ierr;
626   Mat            Anatural,Aapp;
627   AO             ao;
628   PetscInt       rstart,rend,*app,i,m,n,M,N;
629   IS             is;
630   MPI_Comm       comm;
631 
632   PetscFunctionBegin;
633   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
634   ierr = MatGetDM(A, &da);CHKERRQ(ierr);
635   if (!da) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
636 
637   /* Load the matrix in natural ordering */
638   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Anatural);CHKERRQ(ierr);
639   ierr = MatSetType(Anatural,((PetscObject)A)->type_name);CHKERRQ(ierr);
640   ierr = MatGetSize(A,&M,&N);CHKERRQ(ierr);
641   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
642   ierr = MatSetSizes(Anatural,m,n,M,N);CHKERRQ(ierr);
643   ierr = MatLoad(Anatural,viewer);CHKERRQ(ierr);
644 
645   /* Map natural ordering to application ordering and create IS */
646   ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);
647   ierr = MatGetOwnershipRange(Anatural,&rstart,&rend);CHKERRQ(ierr);
648   ierr = PetscMalloc1(rend-rstart,&app);CHKERRQ(ierr);
649   for (i=rstart; i<rend; i++) app[i-rstart] = i;
650   ierr = AOPetscToApplication(ao,rend-rstart,app);CHKERRQ(ierr);
651   ierr = ISCreateGeneral(comm,rend-rstart,app,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
652 
653   /* Do permutation and replace header */
654   ierr = MatCreateSubMatrix(Anatural,is,is,MAT_INITIAL_MATRIX,&Aapp);CHKERRQ(ierr);
655   ierr = MatHeaderReplace(A,&Aapp);CHKERRQ(ierr);
656   ierr = ISDestroy(&is);CHKERRQ(ierr);
657   ierr = MatDestroy(&Anatural);CHKERRQ(ierr);
658   PetscFunctionReturn(0);
659 }
660 
661 PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J)
662 {
663   PetscErrorCode ierr;
664   PetscInt       dim,dof,nx,ny,nz,dims[3],starts[3],M,N,P;
665   Mat            A;
666   MPI_Comm       comm;
667   MatType        Atype;
668   void           (*aij)(void)=NULL,(*baij)(void)=NULL,(*sbaij)(void)=NULL,(*sell)(void)=NULL,(*is)(void)=NULL;
669   MatType        mtype;
670   PetscMPIInt    size;
671   DM_DA          *dd = (DM_DA*)da->data;
672 
673   PetscFunctionBegin;
674   ierr = MatInitializePackage();CHKERRQ(ierr);
675   mtype = da->mattype;
676 
677   /*
678                                   m
679           ------------------------------------------------------
680          |                                                     |
681          |                                                     |
682          |               ----------------------                |
683          |               |                    |                |
684       n  |           ny  |                    |                |
685          |               |                    |                |
686          |               .---------------------                |
687          |             (xs,ys)     nx                          |
688          |            .                                        |
689          |         (gxs,gys)                                   |
690          |                                                     |
691           -----------------------------------------------------
692   */
693 
694   /*
695          nc - number of components per grid point
696          col - number of colors needed in one direction for single component problem
697 
698   */
699   M   = dd->M;
700   N   = dd->N;
701   P   = dd->P;
702   dim = da->dim;
703   dof = dd->w;
704   /* ierr = DMDAGetInfo(da,&dim,&M,&N,&P,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL);CHKERRQ(ierr); */
705   ierr = DMDAGetCorners(da,NULL,NULL,NULL,&nx,&ny,&nz);CHKERRQ(ierr);
706   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
707   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
708   ierr = MatSetSizes(A,dof*nx*ny*nz,dof*nx*ny*nz,dof*M*N*P,dof*M*N*P);CHKERRQ(ierr);
709   ierr = MatSetType(A,mtype);CHKERRQ(ierr);
710   ierr = MatSetFromOptions(A);CHKERRQ(ierr);
711   if (dof*nx*ny*nz < da->bind_below) {
712     ierr = MatSetBindingPropagates(A,PETSC_TRUE);CHKERRQ(ierr);
713     ierr = MatBindToCPU(A,PETSC_TRUE);CHKERRQ(ierr);
714   }
715   ierr = MatSetDM(A,da);CHKERRQ(ierr);
716   if (da->structure_only) {
717     ierr = MatSetOption(A,MAT_STRUCTURE_ONLY,PETSC_TRUE);CHKERRQ(ierr);
718   }
719   ierr = MatGetType(A,&Atype);CHKERRQ(ierr);
720   /*
721      We do not provide a getmatrix function in the DMDA operations because
722    the basic DMDA does not know about matrices. We think of DMDA as being more
723    more low-level than matrices. This is kind of cheating but, cause sometimes
724    we think of DMDA has higher level than matrices.
725 
726      We could switch based on Atype (or mtype), but we do not since the
727    specialized setting routines depend only on the particular preallocation
728    details of the matrix, not the type itself.
729   */
730   ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJSetPreallocation_C",&aij);CHKERRQ(ierr);
731   if (!aij) {
732     ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJSetPreallocation_C",&aij);CHKERRQ(ierr);
733   }
734   if (!aij) {
735     ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIBAIJSetPreallocation_C",&baij);CHKERRQ(ierr);
736     if (!baij) {
737       ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",&baij);CHKERRQ(ierr);
738     }
739     if (!baij) {
740       ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPISBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr);
741       if (!sbaij) {
742         ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr);
743       }
744       if (!sbaij) {
745         ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPISELLSetPreallocation_C",&sell);CHKERRQ(ierr);
746         if (!sell) {
747           ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqSELLSetPreallocation_C",&sell);CHKERRQ(ierr);
748         }
749       }
750       if (!sell) {
751         ierr = PetscObjectQueryFunction((PetscObject)A,"MatISSetPreallocation_C",&is);CHKERRQ(ierr);
752       }
753     }
754   }
755   if (aij) {
756     if (dim == 1) {
757       if (dd->ofill) {
758         ierr = DMCreateMatrix_DA_1d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
759       } else {
760         DMBoundaryType bx;
761         PetscMPIInt  size;
762         ierr = DMDAGetInfo(da,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&bx,NULL,NULL,NULL);CHKERRQ(ierr);
763         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)da),&size);CHKERRMPI(ierr);
764         if (size == 1 && bx == DM_BOUNDARY_NONE) {
765           ierr = DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(da,A,PETSC_FALSE);CHKERRQ(ierr);
766         } else {
767           ierr = DMCreateMatrix_DA_1d_MPIAIJ(da,A,PETSC_FALSE);CHKERRQ(ierr);
768         }
769       }
770     } else if (dim == 2) {
771       if (dd->ofill) {
772         ierr = DMCreateMatrix_DA_2d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
773       } else {
774         ierr = DMCreateMatrix_DA_2d_MPIAIJ(da,A,PETSC_FALSE);CHKERRQ(ierr);
775       }
776     } else if (dim == 3) {
777       if (dd->ofill) {
778         ierr = DMCreateMatrix_DA_3d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
779       } else {
780         ierr = DMCreateMatrix_DA_3d_MPIAIJ(da,A,PETSC_FALSE);CHKERRQ(ierr);
781       }
782     }
783   } else if (baij) {
784     if (dim == 2) {
785       ierr = DMCreateMatrix_DA_2d_MPIBAIJ(da,A);CHKERRQ(ierr);
786     } else if (dim == 3) {
787       ierr = DMCreateMatrix_DA_3d_MPIBAIJ(da,A);CHKERRQ(ierr);
788     } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
789   } else if (sbaij) {
790     if (dim == 2) {
791       ierr = DMCreateMatrix_DA_2d_MPISBAIJ(da,A);CHKERRQ(ierr);
792     } else if (dim == 3) {
793       ierr = DMCreateMatrix_DA_3d_MPISBAIJ(da,A);CHKERRQ(ierr);
794     } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
795   } else if (sell) {
796      if (dim == 2) {
797        ierr = DMCreateMatrix_DA_2d_MPISELL(da,A);CHKERRQ(ierr);
798      } else if (dim == 3) {
799        ierr = DMCreateMatrix_DA_3d_MPISELL(da,A);CHKERRQ(ierr);
800      } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
801   } else if (is) {
802     ierr = DMCreateMatrix_DA_IS(da,A);CHKERRQ(ierr);
803   } else {
804     ISLocalToGlobalMapping ltog;
805 
806     ierr = MatSetBlockSize(A,dof);CHKERRQ(ierr);
807     ierr = MatSetUp(A);CHKERRQ(ierr);
808     ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
809     ierr = MatSetLocalToGlobalMapping(A,ltog,ltog);CHKERRQ(ierr);
810   }
811   ierr = DMDAGetGhostCorners(da,&starts[0],&starts[1],&starts[2],&dims[0],&dims[1],&dims[2]);CHKERRQ(ierr);
812   ierr = MatSetStencil(A,dim,dims,starts,dof);CHKERRQ(ierr);
813   ierr = MatSetDM(A,da);CHKERRQ(ierr);
814   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
815   if (size > 1) {
816     /* change viewer to display matrix in natural ordering */
817     ierr = MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA);CHKERRQ(ierr);
818     ierr = MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA);CHKERRQ(ierr);
819   }
820   *J = A;
821   PetscFunctionReturn(0);
822 }
823 
824 /* ---------------------------------------------------------------------------------*/
825 PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
826 
827 PetscErrorCode DMCreateMatrix_DA_IS(DM dm,Mat J)
828 {
829   DM_DA                  *da = (DM_DA*)dm->data;
830   Mat                    lJ;
831   ISLocalToGlobalMapping ltog;
832   IS                     is_loc_filt, is_glob;
833   const PetscInt         *e_loc,*idx;
834   PetscInt               nel,nen,nv,dof,dim,*gidx,nb;
835   PetscBool              flg;
836   PetscErrorCode         ierr;
837 
838   /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted)
839      We need to filter the local indices that are represented through the DMDAGetElements decomposition
840      This is because the size of the local matrices in MATIS is the local size of the l2g map */
841   PetscFunctionBegin;
842   dof  = da->w;
843   dim  = dm->dim;
844 
845   ierr = MatSetBlockSize(J,dof);CHKERRQ(ierr);
846 
847   /* get local elements indices in local DMDA numbering */
848   ierr = DMDAGetElements(dm,&nel,&nen,&e_loc);CHKERRQ(ierr); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */
849   ierr = ISCreateBlock(PetscObjectComm((PetscObject)dm),dof,nel*nen,e_loc,PETSC_COPY_VALUES,&is_loc_filt);CHKERRQ(ierr);
850   ierr = DMDARestoreElements(dm,&nel,&nen,&e_loc);CHKERRQ(ierr);
851 
852   /* obtain a consistent local ordering for MATIS */
853   ierr = ISSortRemoveDups(is_loc_filt);CHKERRQ(ierr);
854   ierr = ISBlockGetLocalSize(is_loc_filt,&nb);CHKERRQ(ierr);
855   ierr = DMGetLocalToGlobalMapping(dm,&ltog);CHKERRQ(ierr);
856   ierr = ISLocalToGlobalMappingGetSize(ltog,&nv);CHKERRQ(ierr);
857   ierr = PetscMalloc1(PetscMax(nb,nv/dof),&gidx);CHKERRQ(ierr);
858   ierr = ISBlockGetIndices(is_loc_filt,&idx);CHKERRQ(ierr);
859   ierr = ISLocalToGlobalMappingApplyBlock(ltog,nb,idx,gidx);CHKERRQ(ierr);
860   ierr = ISBlockRestoreIndices(is_loc_filt,&idx);CHKERRQ(ierr);
861   ierr = ISCreateBlock(PetscObjectComm((PetscObject)dm),dof,nb,gidx,PETSC_USE_POINTER,&is_glob);CHKERRQ(ierr);
862   ierr = ISLocalToGlobalMappingCreateIS(is_glob,&ltog);CHKERRQ(ierr);
863   ierr = ISDestroy(&is_glob);CHKERRQ(ierr);
864   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
865   ierr = ISLocalToGlobalMappingDestroy(&ltog);CHKERRQ(ierr);
866 
867   /* We also attach a l2g map to the local matrices to have MatSetValueLocal to work */
868   ierr = MatISGetLocalMat(J,&lJ);CHKERRQ(ierr);
869   ierr = ISLocalToGlobalMappingCreateIS(is_loc_filt,&ltog);CHKERRQ(ierr);
870   ierr = ISDestroy(&is_loc_filt);CHKERRQ(ierr);
871   ierr = ISCreateStride(PetscObjectComm((PetscObject)lJ),nv/dof,0,1,&is_glob);CHKERRQ(ierr);
872   ierr = ISGetIndices(is_glob,&idx);CHKERRQ(ierr);
873   ierr = ISGlobalToLocalMappingApplyBlock(ltog,IS_GTOLM_MASK,nv/dof,idx,&nb,gidx);CHKERRQ(ierr);
874   ierr = ISRestoreIndices(is_glob,&idx);CHKERRQ(ierr);
875   ierr = ISDestroy(&is_glob);CHKERRQ(ierr);
876   ierr = ISLocalToGlobalMappingDestroy(&ltog);CHKERRQ(ierr);
877   ierr = ISCreateBlock(PETSC_COMM_SELF,dof,nb,gidx,PETSC_USE_POINTER,&is_loc_filt);CHKERRQ(ierr);
878   ierr = ISLocalToGlobalMappingCreateIS(is_loc_filt,&ltog);CHKERRQ(ierr);
879   ierr = ISDestroy(&is_loc_filt);CHKERRQ(ierr);
880   ierr = MatSetLocalToGlobalMapping(lJ,ltog,ltog);CHKERRQ(ierr);
881   ierr = ISLocalToGlobalMappingDestroy(&ltog);CHKERRQ(ierr);
882   ierr = PetscFree(gidx);CHKERRQ(ierr);
883 
884   /* Preallocation (not exact): we reuse the preallocation routines of the assembled version  */
885   flg = dm->prealloc_only;
886   dm->prealloc_only = PETSC_TRUE;
887   switch (dim) {
888   case 1:
889     ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",MatISSetPreallocation_IS);CHKERRQ(ierr);
890     ierr = DMCreateMatrix_DA_1d_MPIAIJ(dm,J,PETSC_TRUE);CHKERRQ(ierr);
891     ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
892     break;
893   case 2:
894     ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",MatISSetPreallocation_IS);CHKERRQ(ierr);
895     ierr = DMCreateMatrix_DA_2d_MPIAIJ(dm,J,PETSC_TRUE);CHKERRQ(ierr);
896     ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
897     break;
898   case 3:
899     ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",MatISSetPreallocation_IS);CHKERRQ(ierr);
900     ierr = DMCreateMatrix_DA_3d_MPIAIJ(dm,J,PETSC_TRUE);CHKERRQ(ierr);
901     ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
902     break;
903   default:
904     SETERRQ1(PetscObjectComm((PetscObject)dm),PETSC_ERR_SUP,"Unhandled dimension %d",dim);
905   }
906   dm->prealloc_only = flg;
907   PetscFunctionReturn(0);
908 }
909 
910 PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da,Mat J)
911 {
912   PetscErrorCode         ierr;
913   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p;
914   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
915   MPI_Comm               comm;
916   PetscScalar            *values;
917   DMBoundaryType         bx,by;
918   ISLocalToGlobalMapping ltog;
919   DMDAStencilType        st;
920 
921   PetscFunctionBegin;
922   /*
923          nc - number of components per grid point
924          col - number of colors needed in one direction for single component problem
925 
926   */
927   ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr);
928   col  = 2*s + 1;
929   ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr);
930   ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr);
931   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
932 
933   ierr = PetscMalloc2(nc,&rows,col*col*nc*nc,&cols);CHKERRQ(ierr);
934   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
935 
936   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
937   /* determine the matrix preallocation information */
938   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
939   for (i=xs; i<xs+nx; i++) {
940 
941     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
942     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
943 
944     for (j=ys; j<ys+ny; j++) {
945       slot = i - gxs + gnx*(j - gys);
946 
947       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
948       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
949 
950       cnt = 0;
951       for (k=0; k<nc; k++) {
952         for (l=lstart; l<lend+1; l++) {
953           for (p=pstart; p<pend+1; p++) {
954             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
955               cols[cnt++] = k + nc*(slot + gnx*l + p);
956             }
957           }
958         }
959         rows[k] = k + nc*(slot);
960       }
961       ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
962     }
963   }
964   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
965   ierr = MatSeqSELLSetPreallocation(J,0,dnz);CHKERRQ(ierr);
966   ierr = MatMPISELLSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
967   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
968 
969   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
970 
971   /*
972     For each node in the grid: we get the neighbors in the local (on processor ordering
973     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
974     PETSc ordering.
975   */
976   if (!da->prealloc_only) {
977     ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr);
978     for (i=xs; i<xs+nx; i++) {
979 
980       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
981       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
982 
983       for (j=ys; j<ys+ny; j++) {
984         slot = i - gxs + gnx*(j - gys);
985 
986         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
987         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
988 
989         cnt = 0;
990         for (k=0; k<nc; k++) {
991           for (l=lstart; l<lend+1; l++) {
992             for (p=pstart; p<pend+1; p++) {
993               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
994                 cols[cnt++] = k + nc*(slot + gnx*l + p);
995               }
996             }
997           }
998           rows[k] = k + nc*(slot);
999         }
1000         ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1001       }
1002     }
1003     ierr = PetscFree(values);CHKERRQ(ierr);
1004     /* do not copy values to GPU since they are all zero and not yet needed there */
1005     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1006     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1007     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1008     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1009     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1010   }
1011   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1012   PetscFunctionReturn(0);
1013 }
1014 
1015 PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da,Mat J)
1016 {
1017   PetscErrorCode         ierr;
1018   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1019   PetscInt               m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL;
1020   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
1021   MPI_Comm               comm;
1022   PetscScalar            *values;
1023   DMBoundaryType         bx,by,bz;
1024   ISLocalToGlobalMapping ltog;
1025   DMDAStencilType        st;
1026 
1027   PetscFunctionBegin;
1028   /*
1029          nc - number of components per grid point
1030          col - number of colors needed in one direction for single component problem
1031 
1032   */
1033   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1034   col  = 2*s + 1;
1035   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1036   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1037   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1038 
1039   ierr = PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols);CHKERRQ(ierr);
1040   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1041 
1042   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1043   /* determine the matrix preallocation information */
1044   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1045   for (i=xs; i<xs+nx; i++) {
1046     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1047     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1048     for (j=ys; j<ys+ny; j++) {
1049       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1050       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1051       for (k=zs; k<zs+nz; k++) {
1052         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1053         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1054 
1055         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1056 
1057         cnt = 0;
1058         for (l=0; l<nc; l++) {
1059           for (ii=istart; ii<iend+1; ii++) {
1060             for (jj=jstart; jj<jend+1; jj++) {
1061               for (kk=kstart; kk<kend+1; kk++) {
1062                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1063                   cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1064                 }
1065               }
1066             }
1067           }
1068           rows[l] = l + nc*(slot);
1069         }
1070         ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1071       }
1072     }
1073   }
1074   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1075   ierr = MatSeqSELLSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1076   ierr = MatMPISELLSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1077   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1078   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1079 
1080   /*
1081     For each node in the grid: we get the neighbors in the local (on processor ordering
1082     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1083     PETSc ordering.
1084   */
1085   if (!da->prealloc_only) {
1086     ierr = PetscCalloc1(col*col*col*nc*nc*nc,&values);CHKERRQ(ierr);
1087     for (i=xs; i<xs+nx; i++) {
1088       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1089       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1090       for (j=ys; j<ys+ny; j++) {
1091         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1092         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1093         for (k=zs; k<zs+nz; k++) {
1094           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1095           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1096 
1097           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1098 
1099           cnt = 0;
1100           for (l=0; l<nc; l++) {
1101             for (ii=istart; ii<iend+1; ii++) {
1102               for (jj=jstart; jj<jend+1; jj++) {
1103                 for (kk=kstart; kk<kend+1; kk++) {
1104                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1105                     cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1106                   }
1107                 }
1108               }
1109             }
1110             rows[l] = l + nc*(slot);
1111           }
1112           ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1113         }
1114       }
1115     }
1116     ierr = PetscFree(values);CHKERRQ(ierr);
1117     /* do not copy values to GPU since they are all zero and not yet needed there */
1118     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1119     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1120     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1121     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1122     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1123   }
1124   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1125   PetscFunctionReturn(0);
1126 }
1127 
1128 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da,Mat J,PetscBool isIS)
1129 {
1130   PetscErrorCode         ierr;
1131   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,M,N;
1132   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
1133   MPI_Comm               comm;
1134   DMBoundaryType         bx,by;
1135   ISLocalToGlobalMapping ltog,mltog;
1136   DMDAStencilType        st;
1137   PetscBool              removedups = PETSC_FALSE,alreadyboundtocpu = PETSC_TRUE;
1138 
1139   PetscFunctionBegin;
1140   /*
1141          nc - number of components per grid point
1142          col - number of colors needed in one direction for single component problem
1143 
1144   */
1145   ierr = DMDAGetInfo(da,&dim,&m,&n,&M,&N,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr);
1146   if (!isIS && bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) {
1147     ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE);CHKERRQ(ierr);
1148   }
1149   col  = 2*s + 1;
1150   /*
1151        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1152        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1153   */
1154   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1155   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1156   ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr);
1157   ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr);
1158   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1159 
1160   ierr = PetscMalloc2(nc,&rows,col*col*nc*nc,&cols);CHKERRQ(ierr);
1161   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1162 
1163   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1164   /* determine the matrix preallocation information */
1165   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
1166   for (i=xs; i<xs+nx; i++) {
1167 
1168     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1169     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1170 
1171     for (j=ys; j<ys+ny; j++) {
1172       slot = i - gxs + gnx*(j - gys);
1173 
1174       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1175       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1176 
1177       cnt = 0;
1178       for (k=0; k<nc; k++) {
1179         for (l=lstart; l<lend+1; l++) {
1180           for (p=pstart; p<pend+1; p++) {
1181             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
1182               cols[cnt++] = k + nc*(slot + gnx*l + p);
1183             }
1184           }
1185         }
1186         rows[k] = k + nc*(slot);
1187       }
1188       if (removedups) {
1189         ierr = MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1190       } else {
1191         ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1192       }
1193     }
1194   }
1195   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1196   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1197   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1198   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1199   ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr);
1200   if (!mltog) {
1201     ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1202   }
1203 
1204   /*
1205     For each node in the grid: we get the neighbors in the local (on processor ordering
1206     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1207     PETSc ordering.
1208   */
1209   if (!da->prealloc_only) {
1210     for (i=xs; i<xs+nx; i++) {
1211 
1212       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1213       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1214 
1215       for (j=ys; j<ys+ny; j++) {
1216         slot = i - gxs + gnx*(j - gys);
1217 
1218         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1219         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1220 
1221         cnt = 0;
1222         for (l=lstart; l<lend+1; l++) {
1223           for (p=pstart; p<pend+1; p++) {
1224             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
1225               cols[cnt++] = nc*(slot + gnx*l + p);
1226               for (k=1; k<nc; k++) {
1227                 cols[cnt] = 1 + cols[cnt-1];cnt++;
1228               }
1229             }
1230           }
1231         }
1232         for (k=0; k<nc; k++) rows[k] = k + nc*(slot);
1233         ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr);
1234       }
1235     }
1236     /* do not copy values to GPU since they are all zero and not yet needed there */
1237     ierr = MatBoundToCPU(J,&alreadyboundtocpu);CHKERRQ(ierr);
1238     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1239     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1240     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1241     if (!alreadyboundtocpu) {ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);}
1242     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1243     if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) {
1244       ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr);
1245     }
1246   }
1247   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1248   PetscFunctionReturn(0);
1249 }
1250 
1251 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da,Mat J)
1252 {
1253   PetscErrorCode         ierr;
1254   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1255   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt,maxcnt = 0,l,p,M,N;
1256   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
1257   DM_DA                  *dd = (DM_DA*)da->data;
1258   PetscInt               ifill_col,*ofill = dd->ofill, *dfill = dd->dfill;
1259   MPI_Comm               comm;
1260   DMBoundaryType         bx,by;
1261   ISLocalToGlobalMapping ltog;
1262   DMDAStencilType        st;
1263   PetscBool              removedups = PETSC_FALSE;
1264 
1265   PetscFunctionBegin;
1266   /*
1267          nc - number of components per grid point
1268          col - number of colors needed in one direction for single component problem
1269 
1270   */
1271   ierr = DMDAGetInfo(da,&dim,&m,&n,&M,&N,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr);
1272   col  = 2*s + 1;
1273   /*
1274        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1275        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1276   */
1277   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1278   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1279   ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr);
1280   ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr);
1281   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1282 
1283   ierr = PetscMalloc1(col*col*nc,&cols);CHKERRQ(ierr);
1284   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1285 
1286   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1287   /* determine the matrix preallocation information */
1288   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
1289   for (i=xs; i<xs+nx; i++) {
1290 
1291     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1292     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1293 
1294     for (j=ys; j<ys+ny; j++) {
1295       slot = i - gxs + gnx*(j - gys);
1296 
1297       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1298       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1299 
1300       for (k=0; k<nc; k++) {
1301         cnt = 0;
1302         for (l=lstart; l<lend+1; l++) {
1303           for (p=pstart; p<pend+1; p++) {
1304             if (l || p) {
1305               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
1306                 for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p);
1307               }
1308             } else {
1309               if (dfill) {
1310                 for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p);
1311               } else {
1312                 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p);
1313               }
1314             }
1315           }
1316         }
1317         row    = k + nc*(slot);
1318         maxcnt = PetscMax(maxcnt,cnt);
1319         if (removedups) {
1320           ierr   = MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1321         } else {
1322           ierr   = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1323         }
1324       }
1325     }
1326   }
1327   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1328   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1329   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1330   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1331 
1332   /*
1333     For each node in the grid: we get the neighbors in the local (on processor ordering
1334     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1335     PETSc ordering.
1336   */
1337   if (!da->prealloc_only) {
1338     for (i=xs; i<xs+nx; i++) {
1339 
1340       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1341       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1342 
1343       for (j=ys; j<ys+ny; j++) {
1344         slot = i - gxs + gnx*(j - gys);
1345 
1346         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1347         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1348 
1349         for (k=0; k<nc; k++) {
1350           cnt = 0;
1351           for (l=lstart; l<lend+1; l++) {
1352             for (p=pstart; p<pend+1; p++) {
1353               if (l || p) {
1354                 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
1355                   for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p);
1356                 }
1357               } else {
1358                 if (dfill) {
1359                   for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p);
1360                 } else {
1361                   for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p);
1362                 }
1363               }
1364             }
1365           }
1366           row  = k + nc*(slot);
1367           ierr = MatSetValuesLocal(J,1,&row,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr);
1368         }
1369       }
1370     }
1371     /* do not copy values to GPU since they are all zero and not yet needed there */
1372     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1373     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1374     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1375     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1376     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1377   }
1378   ierr = PetscFree(cols);CHKERRQ(ierr);
1379   PetscFunctionReturn(0);
1380 }
1381 
1382 /* ---------------------------------------------------------------------------------*/
1383 
1384 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da,Mat J,PetscBool isIS)
1385 {
1386   PetscErrorCode         ierr;
1387   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1388   PetscInt               m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL;
1389   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
1390   MPI_Comm               comm;
1391   DMBoundaryType         bx,by,bz;
1392   ISLocalToGlobalMapping ltog,mltog;
1393   DMDAStencilType        st;
1394   PetscBool              removedups = PETSC_FALSE;
1395 
1396   PetscFunctionBegin;
1397   /*
1398          nc - number of components per grid point
1399          col - number of colors needed in one direction for single component problem
1400 
1401   */
1402   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1403   if (!isIS && bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) {
1404     ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE);CHKERRQ(ierr);
1405   }
1406   col  = 2*s + 1;
1407 
1408   /*
1409        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1410        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1411   */
1412   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1413   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1414   if (P == 1 && 2*s >= p) removedups = PETSC_TRUE;
1415 
1416   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1417   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1418   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1419 
1420   ierr = PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols);CHKERRQ(ierr);
1421   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1422 
1423   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1424   /* determine the matrix preallocation information */
1425   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1426   for (i=xs; i<xs+nx; i++) {
1427     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1428     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1429     for (j=ys; j<ys+ny; j++) {
1430       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1431       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1432       for (k=zs; k<zs+nz; k++) {
1433         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1434         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1435 
1436         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1437 
1438         cnt = 0;
1439         for (l=0; l<nc; l++) {
1440           for (ii=istart; ii<iend+1; ii++) {
1441             for (jj=jstart; jj<jend+1; jj++) {
1442               for (kk=kstart; kk<kend+1; kk++) {
1443                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1444                   cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1445                 }
1446               }
1447             }
1448           }
1449           rows[l] = l + nc*(slot);
1450         }
1451         if (removedups) {
1452           ierr = MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1453         } else {
1454           ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1455         }
1456       }
1457     }
1458   }
1459   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1460   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1461   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1462   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1463   ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr);
1464   if (!mltog) {
1465     ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1466   }
1467 
1468   /*
1469     For each node in the grid: we get the neighbors in the local (on processor ordering
1470     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1471     PETSc ordering.
1472   */
1473   if (!da->prealloc_only) {
1474     for (i=xs; i<xs+nx; i++) {
1475       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1476       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1477       for (j=ys; j<ys+ny; j++) {
1478         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1479         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1480         for (k=zs; k<zs+nz; k++) {
1481           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1482           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1483 
1484           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1485 
1486           cnt = 0;
1487           for (kk=kstart; kk<kend+1; kk++) {
1488             for (jj=jstart; jj<jend+1; jj++) {
1489               for (ii=istart; ii<iend+1; ii++) {
1490                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1491                   cols[cnt++] = nc*(slot + ii + gnx*jj + gnx*gny*kk);
1492                     for (l=1; l<nc; l++) {
1493                       cols[cnt] = 1 + cols[cnt-1];cnt++;
1494                   }
1495                 }
1496               }
1497             }
1498           }
1499           rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1];
1500           ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr);
1501         }
1502       }
1503     }
1504     /* do not copy values to GPU since they are all zero and not yet needed there */
1505     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1506     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1507     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1508     if (!isIS && bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) {
1509       ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr);
1510     }
1511     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1512     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1513   }
1514   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1515   PetscFunctionReturn(0);
1516 }
1517 
1518 /* ---------------------------------------------------------------------------------*/
1519 
1520 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da,Mat J)
1521 {
1522   PetscErrorCode         ierr;
1523   DM_DA                  *dd = (DM_DA*)da->data;
1524   PetscInt               xs,nx,i,j,gxs,gnx,row,k,l;
1525   PetscInt               m,dim,s,*cols = NULL,nc,cnt,maxcnt = 0,*ocols;
1526   PetscInt               *ofill = dd->ofill,*dfill = dd->dfill;
1527   DMBoundaryType         bx;
1528   ISLocalToGlobalMapping ltog;
1529   PetscMPIInt            rank,size;
1530 
1531   PetscFunctionBegin;
1532   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)da),&rank);CHKERRMPI(ierr);
1533   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)da),&size);CHKERRMPI(ierr);
1534 
1535   /*
1536          nc - number of components per grid point
1537 
1538   */
1539   ierr = DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);CHKERRQ(ierr);
1540   if (s > 1) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Matrix creation for 1d not implemented correctly for stencil width larger than 1");
1541   ierr = DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);CHKERRQ(ierr);
1542   ierr = DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);CHKERRQ(ierr);
1543 
1544   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1545   ierr = PetscCalloc2(nx*nc,&cols,nx*nc,&ocols);CHKERRQ(ierr);
1546 
1547   /*
1548         note should be smaller for first and last process with no periodic
1549         does not handle dfill
1550   */
1551   cnt = 0;
1552   /* coupling with process to the left */
1553   for (i=0; i<s; i++) {
1554     for (j=0; j<nc; j++) {
1555       ocols[cnt] = ((rank == 0) ? 0 : (s - i)*(ofill[j+1] - ofill[j]));
1556       cols[cnt]  = dfill[j+1] - dfill[j] + (s + i)*(ofill[j+1] - ofill[j]);
1557       if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1558         if (size > 1) ocols[cnt] += (s - i)*(ofill[j+1] - ofill[j]);
1559         else cols[cnt] += (s - i)*(ofill[j+1] - ofill[j]);
1560       }
1561       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1562       cnt++;
1563     }
1564   }
1565   for (i=s; i<nx-s; i++) {
1566     for (j=0; j<nc; j++) {
1567       cols[cnt] = dfill[j+1] - dfill[j] + 2*s*(ofill[j+1] - ofill[j]);
1568       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1569       cnt++;
1570     }
1571   }
1572   /* coupling with process to the right */
1573   for (i=nx-s; i<nx; i++) {
1574     for (j=0; j<nc; j++) {
1575       ocols[cnt] = ((rank == (size-1)) ? 0 : (i - nx + s + 1)*(ofill[j+1] - ofill[j]));
1576       cols[cnt]  = dfill[j+1] - dfill[j] + (s + nx - i - 1)*(ofill[j+1] - ofill[j]);
1577       if ((rank == size-1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1578         if (size > 1) ocols[cnt] += (i - nx + s + 1)*(ofill[j+1] - ofill[j]);
1579         else cols[cnt] += (i - nx + s + 1)*(ofill[j+1] - ofill[j]);
1580       }
1581       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1582       cnt++;
1583     }
1584   }
1585 
1586   ierr = MatSeqAIJSetPreallocation(J,0,cols);CHKERRQ(ierr);
1587   ierr = MatMPIAIJSetPreallocation(J,0,cols,0,ocols);CHKERRQ(ierr);
1588   ierr = PetscFree2(cols,ocols);CHKERRQ(ierr);
1589 
1590   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1591   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1592 
1593   /*
1594     For each node in the grid: we get the neighbors in the local (on processor ordering
1595     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1596     PETSc ordering.
1597   */
1598   if (!da->prealloc_only) {
1599     ierr = PetscMalloc1(maxcnt,&cols);CHKERRQ(ierr);
1600     row = xs*nc;
1601     /* coupling with process to the left */
1602     for (i=xs; i<xs+s; i++) {
1603       for (j=0; j<nc; j++) {
1604         cnt = 0;
1605         if (rank) {
1606           for (l=0; l<s; l++) {
1607             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1608           }
1609         }
1610         if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1611           for (l=0; l<s; l++) {
1612             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (m + i - s - l)*nc + ofill[k];
1613           }
1614         }
1615         if (dfill) {
1616           for (k=dfill[j]; k<dfill[j+1]; k++) {
1617             cols[cnt++] = i*nc + dfill[k];
1618           }
1619         } else {
1620           for (k=0; k<nc; k++) {
1621             cols[cnt++] = i*nc + k;
1622           }
1623         }
1624         for (l=0; l<s; l++) {
1625           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1626         }
1627         ierr = MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr);
1628         row++;
1629       }
1630     }
1631     for (i=xs+s; i<xs+nx-s; i++) {
1632       for (j=0; j<nc; j++) {
1633         cnt = 0;
1634         for (l=0; l<s; l++) {
1635           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1636         }
1637         if (dfill) {
1638           for (k=dfill[j]; k<dfill[j+1]; k++) {
1639             cols[cnt++] = i*nc + dfill[k];
1640           }
1641         } else {
1642           for (k=0; k<nc; k++) {
1643             cols[cnt++] = i*nc + k;
1644           }
1645         }
1646         for (l=0; l<s; l++) {
1647           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1648         }
1649         ierr = MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr);
1650         row++;
1651       }
1652     }
1653     /* coupling with process to the right */
1654     for (i=xs+nx-s; i<xs+nx; i++) {
1655       for (j=0; j<nc; j++) {
1656         cnt = 0;
1657         for (l=0; l<s; l++) {
1658           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1659         }
1660         if (dfill) {
1661           for (k=dfill[j]; k<dfill[j+1]; k++) {
1662             cols[cnt++] = i*nc + dfill[k];
1663           }
1664         } else {
1665           for (k=0; k<nc; k++) {
1666             cols[cnt++] = i*nc + k;
1667           }
1668         }
1669         if (rank < size-1) {
1670           for (l=0; l<s; l++) {
1671             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1672           }
1673         }
1674         if ((rank == size-1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1675           for (l=0; l<s; l++) {
1676             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s - l - m + 2)*nc + ofill[k];
1677           }
1678         }
1679         ierr = MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr);
1680         row++;
1681       }
1682     }
1683     ierr = PetscFree(cols);CHKERRQ(ierr);
1684     /* do not copy values to GPU since they are all zero and not yet needed there */
1685     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1686     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1687     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1688     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1689     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1690   }
1691   PetscFunctionReturn(0);
1692 }
1693 
1694 /* ---------------------------------------------------------------------------------*/
1695 
1696 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da,Mat J,PetscBool isIS)
1697 {
1698   PetscErrorCode         ierr;
1699   PetscInt               xs,nx,i,i1,slot,gxs,gnx;
1700   PetscInt               m,dim,s,*cols = NULL,nc,*rows = NULL,col,cnt,l;
1701   PetscInt               istart,iend;
1702   DMBoundaryType         bx;
1703   ISLocalToGlobalMapping ltog,mltog;
1704 
1705   PetscFunctionBegin;
1706   /*
1707          nc - number of components per grid point
1708          col - number of colors needed in one direction for single component problem
1709 
1710   */
1711   ierr = DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);CHKERRQ(ierr);
1712   if (!isIS && bx == DM_BOUNDARY_NONE) {
1713     ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE);CHKERRQ(ierr);
1714   }
1715   col  = 2*s + 1;
1716 
1717   ierr = DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);CHKERRQ(ierr);
1718   ierr = DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);CHKERRQ(ierr);
1719 
1720   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1721   ierr = MatSeqAIJSetPreallocation(J,col*nc,NULL);CHKERRQ(ierr);
1722   ierr = MatMPIAIJSetPreallocation(J,col*nc,NULL,col*nc,NULL);CHKERRQ(ierr);
1723 
1724   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1725   ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr);
1726   if (!mltog) {
1727     ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1728   }
1729 
1730   /*
1731     For each node in the grid: we get the neighbors in the local (on processor ordering
1732     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1733     PETSc ordering.
1734   */
1735   if (!da->prealloc_only) {
1736     ierr = PetscMalloc2(nc,&rows,col*nc*nc,&cols);CHKERRQ(ierr);
1737     for (i=xs; i<xs+nx; i++) {
1738       istart = PetscMax(-s,gxs - i);
1739       iend   = PetscMin(s,gxs + gnx - i - 1);
1740       slot   = i - gxs;
1741 
1742       cnt = 0;
1743       for (i1=istart; i1<iend+1; i1++) {
1744         cols[cnt++] = nc*(slot + i1);
1745         for (l=1; l<nc; l++) {
1746           cols[cnt] = 1 + cols[cnt-1];cnt++;
1747         }
1748       }
1749       rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1];
1750       ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr);
1751     }
1752     /* do not copy values to GPU since they are all zero and not yet needed there */
1753     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1754     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1755     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1756     if (!isIS && bx == DM_BOUNDARY_NONE) {
1757       ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr);
1758     }
1759     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1760     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1761     ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1762   }
1763   PetscFunctionReturn(0);
1764 }
1765 
1766 /* ---------------------------------------------------------------------------------*/
1767 
1768 PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM da,Mat J,PetscBool isIS)
1769 {
1770   PetscErrorCode         ierr;
1771   PetscInt               xs,nx,i,i1,slot,gxs,gnx;
1772   PetscInt               m,dim,s,*cols = NULL,nc,*rows = NULL,col,cnt,l;
1773   PetscInt               istart,iend;
1774   DMBoundaryType         bx;
1775   ISLocalToGlobalMapping ltog,mltog;
1776 
1777   PetscFunctionBegin;
1778   /*
1779          nc - number of components per grid point
1780          col - number of colors needed in one direction for single component problem
1781   */
1782   ierr = DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);CHKERRQ(ierr);
1783   col  = 2*s + 1;
1784 
1785   ierr = DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);CHKERRQ(ierr);
1786   ierr = DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);CHKERRQ(ierr);
1787 
1788   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1789   ierr = MatSeqAIJSetTotalPreallocation(J,nx*nc*col*nc);CHKERRQ(ierr);
1790 
1791   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1792   ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr);
1793   if (!mltog) {
1794     ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1795   }
1796 
1797   /*
1798     For each node in the grid: we get the neighbors in the local (on processor ordering
1799     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1800     PETSc ordering.
1801   */
1802   if (!da->prealloc_only) {
1803     ierr = PetscMalloc2(nc,&rows,col*nc*nc,&cols);CHKERRQ(ierr);
1804     for (i=xs; i<xs+nx; i++) {
1805       istart = PetscMax(-s,gxs - i);
1806       iend   = PetscMin(s,gxs + gnx - i - 1);
1807       slot   = i - gxs;
1808 
1809       cnt = 0;
1810       for (i1=istart; i1<iend+1; i1++) {
1811         cols[cnt++] = nc*(slot + i1);
1812         for (l=1; l<nc; l++) {
1813           cols[cnt] = 1 + cols[cnt-1];cnt++;
1814         }
1815       }
1816       rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1];
1817       ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr);
1818     }
1819     /* do not copy values to GPU since they are all zero and not yet needed there */
1820     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1821     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1822     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1823     if (!isIS && bx == DM_BOUNDARY_NONE) {
1824       ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr);
1825     }
1826     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1827     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1828     ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1829   }
1830   ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr);
1831   PetscFunctionReturn(0);
1832 }
1833 
1834 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da,Mat J)
1835 {
1836   PetscErrorCode         ierr;
1837   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1838   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
1839   PetscInt               istart,iend,jstart,jend,ii,jj;
1840   MPI_Comm               comm;
1841   PetscScalar            *values;
1842   DMBoundaryType         bx,by;
1843   DMDAStencilType        st;
1844   ISLocalToGlobalMapping ltog;
1845 
1846   PetscFunctionBegin;
1847   /*
1848      nc - number of components per grid point
1849      col - number of colors needed in one direction for single component problem
1850   */
1851   ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr);
1852   col  = 2*s + 1;
1853 
1854   ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr);
1855   ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr);
1856   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1857 
1858   ierr = PetscMalloc1(col*col*nc*nc,&cols);CHKERRQ(ierr);
1859 
1860   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1861 
1862   /* determine the matrix preallocation information */
1863   ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
1864   for (i=xs; i<xs+nx; i++) {
1865     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1866     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1867     for (j=ys; j<ys+ny; j++) {
1868       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1869       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1870       slot   = i - gxs + gnx*(j - gys);
1871 
1872       /* Find block columns in block row */
1873       cnt = 0;
1874       for (ii=istart; ii<iend+1; ii++) {
1875         for (jj=jstart; jj<jend+1; jj++) {
1876           if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1877             cols[cnt++] = slot + ii + gnx*jj;
1878           }
1879         }
1880       }
1881       ierr = MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1882     }
1883   }
1884   ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1885   ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1886   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1887 
1888   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1889 
1890   /*
1891     For each node in the grid: we get the neighbors in the local (on processor ordering
1892     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1893     PETSc ordering.
1894   */
1895   if (!da->prealloc_only) {
1896     ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr);
1897     for (i=xs; i<xs+nx; i++) {
1898       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1899       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1900       for (j=ys; j<ys+ny; j++) {
1901         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1902         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1903         slot = i - gxs + gnx*(j - gys);
1904         cnt  = 0;
1905         for (ii=istart; ii<iend+1; ii++) {
1906           for (jj=jstart; jj<jend+1; jj++) {
1907             if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1908               cols[cnt++] = slot + ii + gnx*jj;
1909             }
1910           }
1911         }
1912         ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1913       }
1914     }
1915     ierr = PetscFree(values);CHKERRQ(ierr);
1916     /* do not copy values to GPU since they are all zero and not yet needed there */
1917     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1918     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1919     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1920     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1921     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1922   }
1923   ierr = PetscFree(cols);CHKERRQ(ierr);
1924   PetscFunctionReturn(0);
1925 }
1926 
1927 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da,Mat J)
1928 {
1929   PetscErrorCode         ierr;
1930   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1931   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
1932   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1933   MPI_Comm               comm;
1934   PetscScalar            *values;
1935   DMBoundaryType         bx,by,bz;
1936   DMDAStencilType        st;
1937   ISLocalToGlobalMapping ltog;
1938 
1939   PetscFunctionBegin;
1940   /*
1941          nc - number of components per grid point
1942          col - number of colors needed in one direction for single component problem
1943 
1944   */
1945   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,NULL,NULL,NULL,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1946   col  = 2*s + 1;
1947 
1948   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1949   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1950   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1951 
1952   ierr = PetscMalloc1(col*col*col,&cols);CHKERRQ(ierr);
1953 
1954   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1955 
1956   /* determine the matrix preallocation information */
1957   ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1958   for (i=xs; i<xs+nx; i++) {
1959     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1960     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1961     for (j=ys; j<ys+ny; j++) {
1962       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1963       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1964       for (k=zs; k<zs+nz; k++) {
1965         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1966         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1967 
1968         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1969 
1970         /* Find block columns in block row */
1971         cnt = 0;
1972         for (ii=istart; ii<iend+1; ii++) {
1973           for (jj=jstart; jj<jend+1; jj++) {
1974             for (kk=kstart; kk<kend+1; kk++) {
1975               if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1976                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1977               }
1978             }
1979           }
1980         }
1981         ierr = MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1982       }
1983     }
1984   }
1985   ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1986   ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1987   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1988 
1989   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1990 
1991   /*
1992     For each node in the grid: we get the neighbors in the local (on processor ordering
1993     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1994     PETSc ordering.
1995   */
1996   if (!da->prealloc_only) {
1997     ierr = PetscCalloc1(col*col*col*nc*nc,&values);CHKERRQ(ierr);
1998     for (i=xs; i<xs+nx; i++) {
1999       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2000       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2001       for (j=ys; j<ys+ny; j++) {
2002         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2003         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2004         for (k=zs; k<zs+nz; k++) {
2005           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2006           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2007 
2008           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2009 
2010           cnt = 0;
2011           for (ii=istart; ii<iend+1; ii++) {
2012             for (jj=jstart; jj<jend+1; jj++) {
2013               for (kk=kstart; kk<kend+1; kk++) {
2014                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
2015                   cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
2016                 }
2017               }
2018             }
2019           }
2020           ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
2021         }
2022       }
2023     }
2024     ierr = PetscFree(values);CHKERRQ(ierr);
2025     /* do not copy values to GPU since they are all zero and not yet needed there */
2026     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
2027     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2028     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2029     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
2030     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2031   }
2032   ierr = PetscFree(cols);CHKERRQ(ierr);
2033   PetscFunctionReturn(0);
2034 }
2035 
2036 /*
2037   This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to
2038   identify in the local ordering with periodic domain.
2039 */
2040 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog,PetscInt *row,PetscInt *cnt,PetscInt col[])
2041 {
2042   PetscErrorCode ierr;
2043   PetscInt       i,n;
2044 
2045   PetscFunctionBegin;
2046   ierr = ISLocalToGlobalMappingApplyBlock(ltog,1,row,row);CHKERRQ(ierr);
2047   ierr = ISLocalToGlobalMappingApplyBlock(ltog,*cnt,col,col);CHKERRQ(ierr);
2048   for (i=0,n=0; i<*cnt; i++) {
2049     if (col[i] >= *row) col[n++] = col[i];
2050   }
2051   *cnt = n;
2052   PetscFunctionReturn(0);
2053 }
2054 
2055 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da,Mat J)
2056 {
2057   PetscErrorCode         ierr;
2058   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
2059   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
2060   PetscInt               istart,iend,jstart,jend,ii,jj;
2061   MPI_Comm               comm;
2062   PetscScalar            *values;
2063   DMBoundaryType         bx,by;
2064   DMDAStencilType        st;
2065   ISLocalToGlobalMapping ltog;
2066 
2067   PetscFunctionBegin;
2068   /*
2069      nc - number of components per grid point
2070      col - number of colors needed in one direction for single component problem
2071   */
2072   ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr);
2073   col  = 2*s + 1;
2074 
2075   ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr);
2076   ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr);
2077   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
2078 
2079   ierr = PetscMalloc1(col*col*nc*nc,&cols);CHKERRQ(ierr);
2080 
2081   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
2082 
2083   /* determine the matrix preallocation information */
2084   ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
2085   for (i=xs; i<xs+nx; i++) {
2086     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2087     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2088     for (j=ys; j<ys+ny; j++) {
2089       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2090       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2091       slot   = i - gxs + gnx*(j - gys);
2092 
2093       /* Find block columns in block row */
2094       cnt = 0;
2095       for (ii=istart; ii<iend+1; ii++) {
2096         for (jj=jstart; jj<jend+1; jj++) {
2097           if (st == DMDA_STENCIL_BOX || !ii || !jj) {
2098             cols[cnt++] = slot + ii + gnx*jj;
2099           }
2100         }
2101       }
2102       ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
2103       ierr = MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
2104     }
2105   }
2106   ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
2107   ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
2108   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2109 
2110   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
2111 
2112   /*
2113     For each node in the grid: we get the neighbors in the local (on processor ordering
2114     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2115     PETSc ordering.
2116   */
2117   if (!da->prealloc_only) {
2118     ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr);
2119     for (i=xs; i<xs+nx; i++) {
2120       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2121       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2122       for (j=ys; j<ys+ny; j++) {
2123         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2124         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2125         slot   = i - gxs + gnx*(j - gys);
2126 
2127         /* Find block columns in block row */
2128         cnt = 0;
2129         for (ii=istart; ii<iend+1; ii++) {
2130           for (jj=jstart; jj<jend+1; jj++) {
2131             if (st == DMDA_STENCIL_BOX || !ii || !jj) {
2132               cols[cnt++] = slot + ii + gnx*jj;
2133             }
2134           }
2135         }
2136         ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
2137         ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
2138       }
2139     }
2140     ierr = PetscFree(values);CHKERRQ(ierr);
2141     /* do not copy values to GPU since they are all zero and not yet needed there */
2142     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
2143     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2144     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2145     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
2146     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2147   }
2148   ierr = PetscFree(cols);CHKERRQ(ierr);
2149   PetscFunctionReturn(0);
2150 }
2151 
2152 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da,Mat J)
2153 {
2154   PetscErrorCode         ierr;
2155   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
2156   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
2157   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
2158   MPI_Comm               comm;
2159   PetscScalar            *values;
2160   DMBoundaryType         bx,by,bz;
2161   DMDAStencilType        st;
2162   ISLocalToGlobalMapping ltog;
2163 
2164   PetscFunctionBegin;
2165   /*
2166      nc - number of components per grid point
2167      col - number of colors needed in one direction for single component problem
2168   */
2169   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,NULL,NULL,NULL,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
2170   col  = 2*s + 1;
2171 
2172   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
2173   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
2174   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
2175 
2176   /* create the matrix */
2177   ierr = PetscMalloc1(col*col*col,&cols);CHKERRQ(ierr);
2178 
2179   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
2180 
2181   /* determine the matrix preallocation information */
2182   ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
2183   for (i=xs; i<xs+nx; i++) {
2184     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2185     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2186     for (j=ys; j<ys+ny; j++) {
2187       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2188       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2189       for (k=zs; k<zs+nz; k++) {
2190         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2191         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2192 
2193         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2194 
2195         /* Find block columns in block row */
2196         cnt = 0;
2197         for (ii=istart; ii<iend+1; ii++) {
2198           for (jj=jstart; jj<jend+1; jj++) {
2199             for (kk=kstart; kk<kend+1; kk++) {
2200               if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
2201                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
2202               }
2203             }
2204           }
2205         }
2206         ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
2207         ierr = MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
2208       }
2209     }
2210   }
2211   ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
2212   ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
2213   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2214 
2215   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
2216 
2217   /*
2218     For each node in the grid: we get the neighbors in the local (on processor ordering
2219     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2220     PETSc ordering.
2221   */
2222   if (!da->prealloc_only) {
2223     ierr = PetscCalloc1(col*col*col*nc*nc,&values);CHKERRQ(ierr);
2224     for (i=xs; i<xs+nx; i++) {
2225       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2226       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2227       for (j=ys; j<ys+ny; j++) {
2228         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2229         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2230         for (k=zs; k<zs+nz; k++) {
2231           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2232           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2233 
2234           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2235 
2236           cnt = 0;
2237           for (ii=istart; ii<iend+1; ii++) {
2238             for (jj=jstart; jj<jend+1; jj++) {
2239               for (kk=kstart; kk<kend+1; kk++) {
2240                 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
2241                   cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
2242                 }
2243               }
2244             }
2245           }
2246           ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
2247           ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
2248         }
2249       }
2250     }
2251     ierr = PetscFree(values);CHKERRQ(ierr);
2252     /* do not copy values to GPU since they are all zero and not yet needed there */
2253     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
2254     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2255     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2256     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
2257     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2258   }
2259   ierr = PetscFree(cols);CHKERRQ(ierr);
2260   PetscFunctionReturn(0);
2261 }
2262 
2263 /* ---------------------------------------------------------------------------------*/
2264 
2265 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da,Mat J)
2266 {
2267   PetscErrorCode         ierr;
2268   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
2269   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt, maxcnt = 0,l,p,*dnz,*onz;
2270   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
2271   DM_DA                  *dd = (DM_DA*)da->data;
2272   PetscInt               ifill_col,*dfill = dd->dfill,*ofill = dd->ofill;
2273   MPI_Comm               comm;
2274   PetscScalar            *values;
2275   DMBoundaryType         bx,by,bz;
2276   ISLocalToGlobalMapping ltog;
2277   DMDAStencilType        st;
2278   PetscBool              removedups = PETSC_FALSE;
2279 
2280   PetscFunctionBegin;
2281   /*
2282          nc - number of components per grid point
2283          col - number of colors needed in one direction for single component problem
2284 
2285   */
2286   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
2287   col  = 2*s + 1;
2288   if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
2289                  by 2*stencil_width + 1\n");
2290   if (by == DM_BOUNDARY_PERIODIC && (n % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
2291                  by 2*stencil_width + 1\n");
2292   if (bz == DM_BOUNDARY_PERIODIC && (p % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
2293                  by 2*stencil_width + 1\n");
2294 
2295   /*
2296        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
2297        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
2298   */
2299   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
2300   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
2301   if (P == 1 && 2*s >= p) removedups = PETSC_TRUE;
2302 
2303   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
2304   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
2305   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
2306 
2307   ierr = PetscMalloc1(col*col*col*nc,&cols);CHKERRQ(ierr);
2308   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
2309 
2310   /* determine the matrix preallocation information */
2311   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
2312 
2313   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
2314   for (i=xs; i<xs+nx; i++) {
2315     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2316     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2317     for (j=ys; j<ys+ny; j++) {
2318       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2319       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2320       for (k=zs; k<zs+nz; k++) {
2321         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2322         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2323 
2324         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2325 
2326         for (l=0; l<nc; l++) {
2327           cnt = 0;
2328           for (ii=istart; ii<iend+1; ii++) {
2329             for (jj=jstart; jj<jend+1; jj++) {
2330               for (kk=kstart; kk<kend+1; kk++) {
2331                 if (ii || jj || kk) {
2332                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
2333                     for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2334                   }
2335                 } else {
2336                   if (dfill) {
2337                     for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2338                   } else {
2339                     for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2340                   }
2341                 }
2342               }
2343             }
2344           }
2345           row  = l + nc*(slot);
2346           maxcnt = PetscMax(maxcnt,cnt);
2347           if (removedups) {
2348             ierr = MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
2349           } else {
2350             ierr = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
2351           }
2352         }
2353       }
2354     }
2355   }
2356   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
2357   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
2358   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2359   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
2360 
2361   /*
2362     For each node in the grid: we get the neighbors in the local (on processor ordering
2363     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2364     PETSc ordering.
2365   */
2366   if (!da->prealloc_only) {
2367     ierr = PetscCalloc1(maxcnt,&values);CHKERRQ(ierr);
2368     for (i=xs; i<xs+nx; i++) {
2369       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2370       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2371       for (j=ys; j<ys+ny; j++) {
2372         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2373         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2374         for (k=zs; k<zs+nz; k++) {
2375           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2376           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2377 
2378           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2379 
2380           for (l=0; l<nc; l++) {
2381             cnt = 0;
2382             for (ii=istart; ii<iend+1; ii++) {
2383               for (jj=jstart; jj<jend+1; jj++) {
2384                 for (kk=kstart; kk<kend+1; kk++) {
2385                   if (ii || jj || kk) {
2386                     if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
2387                       for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2388                     }
2389                   } else {
2390                     if (dfill) {
2391                       for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2392                     } else {
2393                       for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2394                     }
2395                   }
2396                 }
2397               }
2398             }
2399             row  = l + nc*(slot);
2400             ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
2401           }
2402         }
2403       }
2404     }
2405     ierr = PetscFree(values);CHKERRQ(ierr);
2406     /* do not copy values to GPU since they are all zero and not yet needed there */
2407     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
2408     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2409     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2410     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
2411     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2412   }
2413   ierr = PetscFree(cols);CHKERRQ(ierr);
2414   PetscFunctionReturn(0);
2415 }
2416