xref: /petsc/src/dm/impls/da/fdda.c (revision b698fc57f0bea7237255b29c1b77df0acc362ffd)
1 
2 #include <petsc/private/dmdaimpl.h> /*I      "petscdmda.h"     I*/
3 #include <petscmat.h>
4 
5 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM,ISColoringType,ISColoring*);
6 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM,ISColoringType,ISColoring*);
7 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM,ISColoringType,ISColoring*);
8 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM,ISColoringType,ISColoring*);
9 
10 /*
11    For ghost i that may be negative or greater than the upper bound this
12   maps it into the 0:m-1 range using periodicity
13 */
14 #define SetInRange(i,m) ((i < 0) ? m+i : ((i >= m) ? i-m : i))
15 
16 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill,PetscInt w,PetscInt **rfill)
17 {
18   PetscErrorCode ierr;
19   PetscInt       i,j,nz,*fill;
20 
21   PetscFunctionBegin;
22   if (!dfill) PetscFunctionReturn(0);
23 
24   /* count number nonzeros */
25   nz = 0;
26   for (i=0; i<w; i++) {
27     for (j=0; j<w; j++) {
28       if (dfill[w*i+j]) nz++;
29     }
30   }
31   ierr = PetscMalloc1(nz + w + 1,&fill);CHKERRQ(ierr);
32   /* construct modified CSR storage of nonzero structure */
33   /*  fill[0 -- w] marks starts of each row of column indices (and end of last row)
34    so fill[1] - fill[0] gives number of nonzeros in first row etc */
35   nz = w + 1;
36   for (i=0; i<w; i++) {
37     fill[i] = nz;
38     for (j=0; j<w; j++) {
39       if (dfill[w*i+j]) {
40         fill[nz] = j;
41         nz++;
42       }
43     }
44   }
45   fill[w] = nz;
46 
47   *rfill = fill;
48   PetscFunctionReturn(0);
49 }
50 
51 
52 static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse,PetscInt w,PetscInt **rfill)
53 {
54   PetscErrorCode ierr;
55   PetscInt       nz;
56 
57   PetscFunctionBegin;
58   if (!dfillsparse) PetscFunctionReturn(0);
59 
60   /* Determine number of non-zeros */
61   nz = (dfillsparse[w] - w - 1);
62 
63   /* Allocate space for our copy of the given sparse matrix representation. */
64   ierr = PetscMalloc1(nz + w + 1,rfill);CHKERRQ(ierr);
65   ierr = PetscArraycpy(*rfill,dfillsparse,nz+w+1);CHKERRQ(ierr);
66   PetscFunctionReturn(0);
67 }
68 
69 
70 static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd)
71 {
72   PetscErrorCode ierr;
73   PetscInt       i,k,cnt = 1;
74 
75   PetscFunctionBegin;
76 
77   /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of
78    columns to the left with any nonzeros in them plus 1 */
79   ierr = PetscCalloc1(dd->w,&dd->ofillcols);CHKERRQ(ierr);
80   for (i=0; i<dd->w; i++) {
81     for (k=dd->ofill[i]; k<dd->ofill[i+1]; k++) dd->ofillcols[dd->ofill[k]] = 1;
82   }
83   for (i=0; i<dd->w; i++) {
84     if (dd->ofillcols[i]) {
85       dd->ofillcols[i] = cnt++;
86     }
87   }
88   PetscFunctionReturn(0);
89 }
90 
91 
92 
93 /*@
94     DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem
95     of the matrix returned by DMCreateMatrix().
96 
97     Logically Collective on da
98 
99     Input Parameter:
100 +   da - the distributed array
101 .   dfill - the fill pattern in the diagonal block (may be NULL, means use dense block)
102 -   ofill - the fill pattern in the off-diagonal blocks
103 
104 
105     Level: developer
106 
107     Notes:
108     This only makes sense when you are doing multicomponent problems but using the
109        MPIAIJ matrix format
110 
111            The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries
112        representing coupling and 0 entries for missing coupling. For example
113 $             dfill[9] = {1, 0, 0,
114 $                         1, 1, 0,
115 $                         0, 1, 1}
116        means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with
117        itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the
118        diagonal block).
119 
120      DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
121      can be represented in the dfill, ofill format
122 
123    Contributed by Glenn Hammond
124 
125 .seealso DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly()
126 
127 @*/
128 PetscErrorCode  DMDASetBlockFills(DM da,const PetscInt *dfill,const PetscInt *ofill)
129 {
130   DM_DA          *dd = (DM_DA*)da->data;
131   PetscErrorCode ierr;
132 
133   PetscFunctionBegin;
134   /* save the given dfill and ofill information */
135   ierr = DMDASetBlockFills_Private(dfill,dd->w,&dd->dfill);CHKERRQ(ierr);
136   ierr = DMDASetBlockFills_Private(ofill,dd->w,&dd->ofill);CHKERRQ(ierr);
137 
138   /* count nonzeros in ofill columns */
139   ierr = DMDASetBlockFills_Private2(dd);CHKERRQ(ierr);
140 
141   PetscFunctionReturn(0);
142 }
143 
144 
145 /*@
146     DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem
147     of the matrix returned by DMCreateMatrix(), using sparse representations
148     of fill patterns.
149 
150     Logically Collective on da
151 
152     Input Parameter:
153 +   da - the distributed array
154 .   dfill - the sparse fill pattern in the diagonal block (may be NULL, means use dense block)
155 -   ofill - the sparse fill pattern in the off-diagonal blocks
156 
157 
158     Level: developer
159 
160     Notes: This only makes sense when you are doing multicomponent problems but using the
161        MPIAIJ matrix format
162 
163            The format for dfill and ofill is a sparse representation of a
164            dof-by-dof matrix with 1 entries representing coupling and 0 entries
165            for missing coupling.  The sparse representation is a 1 dimensional
166            array of length nz + dof + 1, where nz is the number of non-zeros in
167            the matrix.  The first dof entries in the array give the
168            starting array indices of each row's items in the rest of the array,
169            the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array)
170            and the remaining nz items give the column indices of each of
171            the 1s within the logical 2D matrix.  Each row's items within
172            the array are the column indices of the 1s within that row
173            of the 2D matrix.  PETSc developers may recognize that this is the
174            same format as that computed by the DMDASetBlockFills_Private()
175            function from a dense 2D matrix representation.
176 
177      DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
178      can be represented in the dfill, ofill format
179 
180    Contributed by Philip C. Roth
181 
182 .seealso DMDASetBlockFills(), DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly()
183 
184 @*/
185 PetscErrorCode  DMDASetBlockFillsSparse(DM da,const PetscInt *dfillsparse,const PetscInt *ofillsparse)
186 {
187   DM_DA          *dd = (DM_DA*)da->data;
188   PetscErrorCode ierr;
189 
190   PetscFunctionBegin;
191   /* save the given dfill and ofill information */
192   ierr = DMDASetBlockFillsSparse_Private(dfillsparse,dd->w,&dd->dfill);CHKERRQ(ierr);
193   ierr = DMDASetBlockFillsSparse_Private(ofillsparse,dd->w,&dd->ofill);CHKERRQ(ierr);
194 
195   /* count nonzeros in ofill columns */
196   ierr = DMDASetBlockFills_Private2(dd);CHKERRQ(ierr);
197 
198   PetscFunctionReturn(0);
199 }
200 
201 
202 PetscErrorCode  DMCreateColoring_DA(DM da,ISColoringType ctype,ISColoring *coloring)
203 {
204   PetscErrorCode   ierr;
205   PetscInt         dim,m,n,p,nc;
206   DMBoundaryType   bx,by,bz;
207   MPI_Comm         comm;
208   PetscMPIInt      size;
209   PetscBool        isBAIJ;
210   DM_DA            *dd = (DM_DA*)da->data;
211 
212   PetscFunctionBegin;
213   /*
214                                   m
215           ------------------------------------------------------
216          |                                                     |
217          |                                                     |
218          |               ----------------------                |
219          |               |                    |                |
220       n  |           yn  |                    |                |
221          |               |                    |                |
222          |               .---------------------                |
223          |             (xs,ys)     xn                          |
224          |            .                                        |
225          |         (gxs,gys)                                   |
226          |                                                     |
227           -----------------------------------------------------
228   */
229 
230   /*
231          nc - number of components per grid point
232          col - number of colors needed in one direction for single component problem
233 
234   */
235   ierr = DMDAGetInfo(da,&dim,NULL,NULL,NULL,&m,&n,&p,&nc,NULL,&bx,&by,&bz,NULL);CHKERRQ(ierr);
236 
237   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
238   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
239   if (ctype == IS_COLORING_LOCAL) {
240     if (size == 1) {
241       ctype = IS_COLORING_GLOBAL;
242     } else if (dim > 1) {
243       if ((m==1 && bx == DM_BOUNDARY_PERIODIC) || (n==1 && by == DM_BOUNDARY_PERIODIC) || (p==1 && bz == DM_BOUNDARY_PERIODIC)) {
244         SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain  on the same process");
245       }
246     }
247   }
248 
249   /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ
250      matrices is for the blocks, not the individual matrix elements  */
251   ierr = PetscStrbeginswith(da->mattype,MATBAIJ,&isBAIJ);CHKERRQ(ierr);
252   if (!isBAIJ) {ierr = PetscStrbeginswith(da->mattype,MATMPIBAIJ,&isBAIJ);CHKERRQ(ierr);}
253   if (!isBAIJ) {ierr = PetscStrbeginswith(da->mattype,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);}
254   if (isBAIJ) {
255     dd->w  = 1;
256     dd->xs = dd->xs/nc;
257     dd->xe = dd->xe/nc;
258     dd->Xs = dd->Xs/nc;
259     dd->Xe = dd->Xe/nc;
260   }
261 
262   /*
263      We do not provide a getcoloring function in the DMDA operations because
264    the basic DMDA does not know about matrices. We think of DMDA as being
265    more low-level then matrices.
266   */
267   if (dim == 1) {
268     ierr = DMCreateColoring_DA_1d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
269   } else if (dim == 2) {
270     ierr = DMCreateColoring_DA_2d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
271   } else if (dim == 3) {
272     ierr = DMCreateColoring_DA_3d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
273   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not done for %D dimension, send us mail petsc-maint@mcs.anl.gov for code",dim);
274   if (isBAIJ) {
275     dd->w  = nc;
276     dd->xs = dd->xs*nc;
277     dd->xe = dd->xe*nc;
278     dd->Xs = dd->Xs*nc;
279     dd->Xe = dd->Xe*nc;
280   }
281   PetscFunctionReturn(0);
282 }
283 
284 /* ---------------------------------------------------------------------------------*/
285 
286 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
287 {
288   PetscErrorCode  ierr;
289   PetscInt        xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,M,N,dim,s,k,nc,col;
290   PetscInt        ncolors;
291   MPI_Comm        comm;
292   DMBoundaryType  bx,by;
293   DMDAStencilType st;
294   ISColoringValue *colors;
295   DM_DA           *dd = (DM_DA*)da->data;
296 
297   PetscFunctionBegin;
298   /*
299          nc - number of components per grid point
300          col - number of colors needed in one direction for single component problem
301 
302   */
303   ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,&M,&N,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr);
304   col  = 2*s + 1;
305   ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr);
306   ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr);
307   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
308 
309   /* special case as taught to us by Paul Hovland */
310   if (st == DMDA_STENCIL_STAR && s == 1) {
311     ierr = DMCreateColoring_DA_2d_5pt_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
312   } else {
313     if (ctype == IS_COLORING_GLOBAL) {
314       if (!dd->localcoloring) {
315         ierr = PetscMalloc1(nc*nx*ny,&colors);CHKERRQ(ierr);
316         ii   = 0;
317         for (j=ys; j<ys+ny; j++) {
318           for (i=xs; i<xs+nx; i++) {
319             for (k=0; k<nc; k++) {
320               colors[ii++] = k + nc*((i % col) + col*(j % col));
321             }
322           }
323         }
324         ncolors = nc + nc*(col-1 + col*(col-1));
325         ierr    = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
326       }
327       *coloring = dd->localcoloring;
328     } else if (ctype == IS_COLORING_LOCAL) {
329       if (!dd->ghostedcoloring) {
330         ierr = PetscMalloc1(nc*gnx*gny,&colors);CHKERRQ(ierr);
331         ii   = 0;
332         for (j=gys; j<gys+gny; j++) {
333           for (i=gxs; i<gxs+gnx; i++) {
334             for (k=0; k<nc; k++) {
335               /* the complicated stuff is to handle periodic boundaries */
336               colors[ii++] = k + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col));
337             }
338           }
339         }
340         ncolors = nc + nc*(col - 1 + col*(col-1));
341         ierr    = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
342         /* PetscIntView(ncolors,(PetscInt*)colors,0); */
343 
344         ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
345       }
346       *coloring = dd->ghostedcoloring;
347     } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
348   }
349   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
350   PetscFunctionReturn(0);
351 }
352 
353 /* ---------------------------------------------------------------------------------*/
354 
355 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
356 {
357   PetscErrorCode  ierr;
358   PetscInt        xs,ys,nx,ny,i,j,gxs,gys,gnx,gny,m,n,p,dim,s,k,nc,col,zs,gzs,ii,l,nz,gnz,M,N,P;
359   PetscInt        ncolors;
360   MPI_Comm        comm;
361   DMBoundaryType  bx,by,bz;
362   DMDAStencilType st;
363   ISColoringValue *colors;
364   DM_DA           *dd = (DM_DA*)da->data;
365 
366   PetscFunctionBegin;
367   /*
368          nc - number of components per grid point
369          col - number of colors needed in one direction for single component problem
370 
371   */
372   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
373   col  = 2*s + 1;
374   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
375   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
376   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
377 
378   /* create the coloring */
379   if (ctype == IS_COLORING_GLOBAL) {
380     if (!dd->localcoloring) {
381       ierr = PetscMalloc1(nc*nx*ny*nz,&colors);CHKERRQ(ierr);
382       ii   = 0;
383       for (k=zs; k<zs+nz; k++) {
384         for (j=ys; j<ys+ny; j++) {
385           for (i=xs; i<xs+nx; i++) {
386             for (l=0; l<nc; l++) {
387               colors[ii++] = l + nc*((i % col) + col*(j % col) + col*col*(k % col));
388             }
389           }
390         }
391       }
392       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
393       ierr    = ISColoringCreate(comm,ncolors,nc*nx*ny*nz,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
394     }
395     *coloring = dd->localcoloring;
396   } else if (ctype == IS_COLORING_LOCAL) {
397     if (!dd->ghostedcoloring) {
398       ierr = PetscMalloc1(nc*gnx*gny*gnz,&colors);CHKERRQ(ierr);
399       ii   = 0;
400       for (k=gzs; k<gzs+gnz; k++) {
401         for (j=gys; j<gys+gny; j++) {
402           for (i=gxs; i<gxs+gnx; i++) {
403             for (l=0; l<nc; l++) {
404               /* the complicated stuff is to handle periodic boundaries */
405               colors[ii++] = l + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col) + col*col*(SetInRange(k,p) % col));
406             }
407           }
408         }
409       }
410       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
411       ierr    = ISColoringCreate(comm,ncolors,nc*gnx*gny*gnz,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
412       ierr    = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
413     }
414     *coloring = dd->ghostedcoloring;
415   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
416   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
417   PetscFunctionReturn(0);
418 }
419 
420 /* ---------------------------------------------------------------------------------*/
421 
422 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
423 {
424   PetscErrorCode  ierr;
425   PetscInt        xs,nx,i,i1,gxs,gnx,l,m,M,dim,s,nc,col;
426   PetscInt        ncolors;
427   MPI_Comm        comm;
428   DMBoundaryType  bx;
429   ISColoringValue *colors;
430   DM_DA           *dd = (DM_DA*)da->data;
431 
432   PetscFunctionBegin;
433   /*
434          nc - number of components per grid point
435          col - number of colors needed in one direction for single component problem
436 
437   */
438   ierr = DMDAGetInfo(da,&dim,&m,NULL,NULL,&M,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);CHKERRQ(ierr);
439   col  = 2*s + 1;
440   ierr = DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);CHKERRQ(ierr);
441   ierr = DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);CHKERRQ(ierr);
442   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
443 
444   /* create the coloring */
445   if (ctype == IS_COLORING_GLOBAL) {
446     if (!dd->localcoloring) {
447       ierr = PetscMalloc1(nc*nx,&colors);CHKERRQ(ierr);
448       if (dd->ofillcols) {
449         PetscInt tc = 0;
450         for (i=0; i<nc; i++) tc += (PetscInt) (dd->ofillcols[i] > 0);
451         i1 = 0;
452         for (i=xs; i<xs+nx; i++) {
453           for (l=0; l<nc; l++) {
454             if (dd->ofillcols[l] && (i % col)) {
455               colors[i1++] =  nc - 1 + tc*((i % col) - 1) + dd->ofillcols[l];
456             } else {
457               colors[i1++] = l;
458             }
459           }
460         }
461         ncolors = nc + 2*s*tc;
462       } else {
463         i1 = 0;
464         for (i=xs; i<xs+nx; i++) {
465           for (l=0; l<nc; l++) {
466             colors[i1++] = l + nc*(i % col);
467           }
468         }
469         ncolors = nc + nc*(col-1);
470       }
471       ierr = ISColoringCreate(comm,ncolors,nc*nx,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
472     }
473     *coloring = dd->localcoloring;
474   } else if (ctype == IS_COLORING_LOCAL) {
475     if (!dd->ghostedcoloring) {
476       ierr = PetscMalloc1(nc*gnx,&colors);CHKERRQ(ierr);
477       i1   = 0;
478       for (i=gxs; i<gxs+gnx; i++) {
479         for (l=0; l<nc; l++) {
480           /* the complicated stuff is to handle periodic boundaries */
481           colors[i1++] = l + nc*(SetInRange(i,m) % col);
482         }
483       }
484       ncolors = nc + nc*(col-1);
485       ierr    = ISColoringCreate(comm,ncolors,nc*gnx,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
486       ierr    = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
487     }
488     *coloring = dd->ghostedcoloring;
489   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
490   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
491   PetscFunctionReturn(0);
492 }
493 
494 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
495 {
496   PetscErrorCode  ierr;
497   PetscInt        xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,dim,s,k,nc;
498   PetscInt        ncolors;
499   MPI_Comm        comm;
500   DMBoundaryType  bx,by;
501   ISColoringValue *colors;
502   DM_DA           *dd = (DM_DA*)da->data;
503 
504   PetscFunctionBegin;
505   /*
506          nc - number of components per grid point
507          col - number of colors needed in one direction for single component problem
508 
509   */
510   ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,NULL);CHKERRQ(ierr);
511   ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr);
512   ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr);
513   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
514   /* create the coloring */
515   if (ctype == IS_COLORING_GLOBAL) {
516     if (!dd->localcoloring) {
517       ierr = PetscMalloc1(nc*nx*ny,&colors);CHKERRQ(ierr);
518       ii   = 0;
519       for (j=ys; j<ys+ny; j++) {
520         for (i=xs; i<xs+nx; i++) {
521           for (k=0; k<nc; k++) {
522             colors[ii++] = k + nc*((3*j+i) % 5);
523           }
524         }
525       }
526       ncolors = 5*nc;
527       ierr    = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
528     }
529     *coloring = dd->localcoloring;
530   } else if (ctype == IS_COLORING_LOCAL) {
531     if (!dd->ghostedcoloring) {
532       ierr = PetscMalloc1(nc*gnx*gny,&colors);CHKERRQ(ierr);
533       ii = 0;
534       for (j=gys; j<gys+gny; j++) {
535         for (i=gxs; i<gxs+gnx; i++) {
536           for (k=0; k<nc; k++) {
537             colors[ii++] = k + nc*((3*SetInRange(j,n) + SetInRange(i,m)) % 5);
538           }
539         }
540       }
541       ncolors = 5*nc;
542       ierr    = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
543       ierr    = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
544     }
545     *coloring = dd->ghostedcoloring;
546   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
547   PetscFunctionReturn(0);
548 }
549 
550 /* =========================================================================== */
551 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM,Mat,PetscBool);
552 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM,Mat);
553 extern PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM,Mat,PetscBool);
554 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM,Mat,PetscBool);
555 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM,Mat);
556 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM,Mat,PetscBool);
557 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM,Mat);
558 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM,Mat);
559 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM,Mat);
560 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM,Mat);
561 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM,Mat);
562 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM,Mat);
563 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM,Mat);
564 extern PetscErrorCode DMCreateMatrix_DA_IS(DM,Mat);
565 
566 /*@C
567    MatSetupDM - Sets the DMDA that is to be used by the HYPRE_StructMatrix PETSc matrix
568 
569    Logically Collective on mat
570 
571    Input Parameters:
572 +  mat - the matrix
573 -  da - the da
574 
575    Level: intermediate
576 
577 @*/
578 PetscErrorCode MatSetupDM(Mat mat,DM da)
579 {
580   PetscErrorCode ierr;
581 
582   PetscFunctionBegin;
583   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
584   PetscValidHeaderSpecificType(da,DM_CLASSID,1,DMDA);
585   ierr = PetscTryMethod(mat,"MatSetupDM_C",(Mat,DM),(mat,da));CHKERRQ(ierr);
586   PetscFunctionReturn(0);
587 }
588 
589 PetscErrorCode  MatView_MPI_DA(Mat A,PetscViewer viewer)
590 {
591   DM                da;
592   PetscErrorCode    ierr;
593   const char        *prefix;
594   Mat               Anatural;
595   AO                ao;
596   PetscInt          rstart,rend,*petsc,i;
597   IS                is;
598   MPI_Comm          comm;
599   PetscViewerFormat format;
600 
601   PetscFunctionBegin;
602   /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */
603   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
604   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(0);
605 
606   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
607   ierr = MatGetDM(A, &da);CHKERRQ(ierr);
608   if (!da) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
609 
610   ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);
611   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
612   ierr = PetscMalloc1(rend-rstart,&petsc);CHKERRQ(ierr);
613   for (i=rstart; i<rend; i++) petsc[i-rstart] = i;
614   ierr = AOApplicationToPetsc(ao,rend-rstart,petsc);CHKERRQ(ierr);
615   ierr = ISCreateGeneral(comm,rend-rstart,petsc,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
616 
617   /* call viewer on natural ordering */
618   ierr = MatCreateSubMatrix(A,is,is,MAT_INITIAL_MATRIX,&Anatural);CHKERRQ(ierr);
619   ierr = ISDestroy(&is);CHKERRQ(ierr);
620   ierr = PetscObjectGetOptionsPrefix((PetscObject)A,&prefix);CHKERRQ(ierr);
621   ierr = PetscObjectSetOptionsPrefix((PetscObject)Anatural,prefix);CHKERRQ(ierr);
622   ierr = PetscObjectSetName((PetscObject)Anatural,((PetscObject)A)->name);CHKERRQ(ierr);
623   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE;
624   ierr = MatView(Anatural,viewer);CHKERRQ(ierr);
625   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE;
626   ierr = MatDestroy(&Anatural);CHKERRQ(ierr);
627   PetscFunctionReturn(0);
628 }
629 
630 PetscErrorCode  MatLoad_MPI_DA(Mat A,PetscViewer viewer)
631 {
632   DM             da;
633   PetscErrorCode ierr;
634   Mat            Anatural,Aapp;
635   AO             ao;
636   PetscInt       rstart,rend,*app,i,m,n,M,N;
637   IS             is;
638   MPI_Comm       comm;
639 
640   PetscFunctionBegin;
641   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
642   ierr = MatGetDM(A, &da);CHKERRQ(ierr);
643   if (!da) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
644 
645   /* Load the matrix in natural ordering */
646   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Anatural);CHKERRQ(ierr);
647   ierr = MatSetType(Anatural,((PetscObject)A)->type_name);CHKERRQ(ierr);
648   ierr = MatGetSize(A,&M,&N);CHKERRQ(ierr);
649   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
650   ierr = MatSetSizes(Anatural,m,n,M,N);CHKERRQ(ierr);
651   ierr = MatLoad(Anatural,viewer);CHKERRQ(ierr);
652 
653   /* Map natural ordering to application ordering and create IS */
654   ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);
655   ierr = MatGetOwnershipRange(Anatural,&rstart,&rend);CHKERRQ(ierr);
656   ierr = PetscMalloc1(rend-rstart,&app);CHKERRQ(ierr);
657   for (i=rstart; i<rend; i++) app[i-rstart] = i;
658   ierr = AOPetscToApplication(ao,rend-rstart,app);CHKERRQ(ierr);
659   ierr = ISCreateGeneral(comm,rend-rstart,app,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
660 
661   /* Do permutation and replace header */
662   ierr = MatCreateSubMatrix(Anatural,is,is,MAT_INITIAL_MATRIX,&Aapp);CHKERRQ(ierr);
663   ierr = MatHeaderReplace(A,&Aapp);CHKERRQ(ierr);
664   ierr = ISDestroy(&is);CHKERRQ(ierr);
665   ierr = MatDestroy(&Anatural);CHKERRQ(ierr);
666   PetscFunctionReturn(0);
667 }
668 
669 PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J)
670 {
671   PetscErrorCode ierr;
672   PetscInt       dim,dof,nx,ny,nz,dims[3],starts[3],M,N,P;
673   Mat            A;
674   MPI_Comm       comm;
675   MatType        Atype;
676   void           (*aij)(void)=NULL,(*baij)(void)=NULL,(*sbaij)(void)=NULL,(*sell)(void)=NULL,(*is)(void)=NULL;
677   MatType        mtype;
678   PetscMPIInt    size;
679   DM_DA          *dd = (DM_DA*)da->data;
680 
681   PetscFunctionBegin;
682   ierr = MatInitializePackage();CHKERRQ(ierr);
683   mtype = da->mattype;
684 
685   /*
686                                   m
687           ------------------------------------------------------
688          |                                                     |
689          |                                                     |
690          |               ----------------------                |
691          |               |                    |                |
692       n  |           ny  |                    |                |
693          |               |                    |                |
694          |               .---------------------                |
695          |             (xs,ys)     nx                          |
696          |            .                                        |
697          |         (gxs,gys)                                   |
698          |                                                     |
699           -----------------------------------------------------
700   */
701 
702   /*
703          nc - number of components per grid point
704          col - number of colors needed in one direction for single component problem
705 
706   */
707   M   = dd->M;
708   N   = dd->N;
709   P   = dd->P;
710   dim = da->dim;
711   dof = dd->w;
712   /* ierr = DMDAGetInfo(da,&dim,&M,&N,&P,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL);CHKERRQ(ierr); */
713   ierr = DMDAGetCorners(da,NULL,NULL,NULL,&nx,&ny,&nz);CHKERRQ(ierr);
714   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
715   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
716   ierr = MatSetSizes(A,dof*nx*ny*nz,dof*nx*ny*nz,dof*M*N*P,dof*M*N*P);CHKERRQ(ierr);
717   ierr = MatSetType(A,mtype);CHKERRQ(ierr);
718   ierr = MatSetFromOptions(A);CHKERRQ(ierr);
719   ierr = MatSetDM(A,da);CHKERRQ(ierr);
720   if (da->structure_only) {
721     ierr = MatSetOption(A,MAT_STRUCTURE_ONLY,PETSC_TRUE);CHKERRQ(ierr);
722   }
723   ierr = MatGetType(A,&Atype);CHKERRQ(ierr);
724   /*
725      We do not provide a getmatrix function in the DMDA operations because
726    the basic DMDA does not know about matrices. We think of DMDA as being more
727    more low-level than matrices. This is kind of cheating but, cause sometimes
728    we think of DMDA has higher level than matrices.
729 
730      We could switch based on Atype (or mtype), but we do not since the
731    specialized setting routines depend only on the particular preallocation
732    details of the matrix, not the type itself.
733   */
734   ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJSetPreallocation_C",&aij);CHKERRQ(ierr);
735   if (!aij) {
736     ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJSetPreallocation_C",&aij);CHKERRQ(ierr);
737   }
738   if (!aij) {
739     ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIBAIJSetPreallocation_C",&baij);CHKERRQ(ierr);
740     if (!baij) {
741       ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",&baij);CHKERRQ(ierr);
742     }
743     if (!baij) {
744       ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPISBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr);
745       if (!sbaij) {
746         ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr);
747       }
748       if (!sbaij) {
749         ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPISELLSetPreallocation_C",&sell);CHKERRQ(ierr);
750         if (!sell) {
751           ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqSELLSetPreallocation_C",&sell);CHKERRQ(ierr);
752         }
753       }
754       if (!sell) {
755         ierr = PetscObjectQueryFunction((PetscObject)A,"MatISSetPreallocation_C",&is);CHKERRQ(ierr);
756       }
757     }
758   }
759   if (aij) {
760     if (dim == 1) {
761       if (dd->ofill) {
762         ierr = DMCreateMatrix_DA_1d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
763       } else {
764         DMBoundaryType bx;
765         PetscMPIInt  size;
766         ierr = DMDAGetInfo(da,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&bx,NULL,NULL,NULL);CHKERRQ(ierr);
767         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)da),&size);CHKERRQ(ierr);
768         if (size == 1 && bx == DM_BOUNDARY_NONE) {
769           ierr = DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(da,A,PETSC_FALSE);CHKERRQ(ierr);
770         } else {
771           ierr = DMCreateMatrix_DA_1d_MPIAIJ(da,A,PETSC_FALSE);CHKERRQ(ierr);
772         }
773       }
774     } else if (dim == 2) {
775       if (dd->ofill) {
776         ierr = DMCreateMatrix_DA_2d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
777       } else {
778         ierr = DMCreateMatrix_DA_2d_MPIAIJ(da,A,PETSC_FALSE);CHKERRQ(ierr);
779       }
780     } else if (dim == 3) {
781       if (dd->ofill) {
782         ierr = DMCreateMatrix_DA_3d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
783       } else {
784         ierr = DMCreateMatrix_DA_3d_MPIAIJ(da,A,PETSC_FALSE);CHKERRQ(ierr);
785       }
786     }
787   } else if (baij) {
788     if (dim == 2) {
789       ierr = DMCreateMatrix_DA_2d_MPIBAIJ(da,A);CHKERRQ(ierr);
790     } else if (dim == 3) {
791       ierr = DMCreateMatrix_DA_3d_MPIBAIJ(da,A);CHKERRQ(ierr);
792     } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
793   } else if (sbaij) {
794     if (dim == 2) {
795       ierr = DMCreateMatrix_DA_2d_MPISBAIJ(da,A);CHKERRQ(ierr);
796     } else if (dim == 3) {
797       ierr = DMCreateMatrix_DA_3d_MPISBAIJ(da,A);CHKERRQ(ierr);
798     } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
799   } else if (sell) {
800      if (dim == 2) {
801        ierr = DMCreateMatrix_DA_2d_MPISELL(da,A);CHKERRQ(ierr);
802      } else if (dim == 3) {
803        ierr = DMCreateMatrix_DA_3d_MPISELL(da,A);CHKERRQ(ierr);
804      } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
805   } else if (is) {
806     ierr = DMCreateMatrix_DA_IS(da,A);CHKERRQ(ierr);
807   } else {
808     ISLocalToGlobalMapping ltog;
809 
810     ierr = MatSetBlockSize(A,dof);CHKERRQ(ierr);
811     ierr = MatSetUp(A);CHKERRQ(ierr);
812     ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
813     ierr = MatSetLocalToGlobalMapping(A,ltog,ltog);CHKERRQ(ierr);
814   }
815   ierr = DMDAGetGhostCorners(da,&starts[0],&starts[1],&starts[2],&dims[0],&dims[1],&dims[2]);CHKERRQ(ierr);
816   ierr = MatSetStencil(A,dim,dims,starts,dof);CHKERRQ(ierr);
817   ierr = MatSetDM(A,da);CHKERRQ(ierr);
818   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
819   if (size > 1) {
820     /* change viewer to display matrix in natural ordering */
821     ierr = MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA);CHKERRQ(ierr);
822     ierr = MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA);CHKERRQ(ierr);
823   }
824   *J = A;
825   PetscFunctionReturn(0);
826 }
827 
828 /* ---------------------------------------------------------------------------------*/
829 PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
830 
831 PetscErrorCode DMCreateMatrix_DA_IS(DM dm,Mat J)
832 {
833   DM_DA                  *da = (DM_DA*)dm->data;
834   Mat                    lJ;
835   ISLocalToGlobalMapping ltog;
836   IS                     is_loc_filt, is_glob;
837   const PetscInt         *e_loc,*idx;
838   PetscInt               nel,nen,nv,dof,dim,*gidx,nb;
839   PetscBool              flg;
840   PetscErrorCode         ierr;
841 
842   /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted)
843      We need to filter the local indices that are represented through the DMDAGetElements decomposition
844      This is because the size of the local matrices in MATIS is the local size of the l2g map */
845   PetscFunctionBegin;
846   dof  = da->w;
847   dim  = dm->dim;
848 
849   ierr = MatSetBlockSize(J,dof);CHKERRQ(ierr);
850 
851   /* get local elements indices in local DMDA numbering */
852   ierr = DMDAGetElements(dm,&nel,&nen,&e_loc);CHKERRQ(ierr); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */
853   ierr = ISCreateBlock(PetscObjectComm((PetscObject)dm),dof,nel*nen,e_loc,PETSC_COPY_VALUES,&is_loc_filt);CHKERRQ(ierr);
854   ierr = DMDARestoreElements(dm,&nel,&nen,&e_loc);CHKERRQ(ierr);
855 
856   /* obtain a consistent local ordering for MATIS */
857   ierr = ISSortRemoveDups(is_loc_filt);CHKERRQ(ierr);
858   ierr = ISBlockGetLocalSize(is_loc_filt,&nb);CHKERRQ(ierr);
859   ierr = DMGetLocalToGlobalMapping(dm,&ltog);CHKERRQ(ierr);
860   ierr = ISLocalToGlobalMappingGetSize(ltog,&nv);CHKERRQ(ierr);
861   ierr = PetscMalloc1(PetscMax(nb,nv/dof),&gidx);CHKERRQ(ierr);
862   ierr = ISBlockGetIndices(is_loc_filt,&idx);CHKERRQ(ierr);
863   ierr = ISLocalToGlobalMappingApplyBlock(ltog,nb,idx,gidx);CHKERRQ(ierr);
864   ierr = ISBlockRestoreIndices(is_loc_filt,&idx);CHKERRQ(ierr);
865   ierr = ISCreateBlock(PetscObjectComm((PetscObject)dm),dof,nb,gidx,PETSC_USE_POINTER,&is_glob);CHKERRQ(ierr);
866   ierr = ISLocalToGlobalMappingCreateIS(is_glob,&ltog);CHKERRQ(ierr);
867   ierr = ISDestroy(&is_glob);CHKERRQ(ierr);
868   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
869   ierr = ISLocalToGlobalMappingDestroy(&ltog);CHKERRQ(ierr);
870 
871   /* We also attach a l2g map to the local matrices to have MatSetValueLocal to work */
872   ierr = MatISGetLocalMat(J,&lJ);CHKERRQ(ierr);
873   ierr = ISLocalToGlobalMappingCreateIS(is_loc_filt,&ltog);CHKERRQ(ierr);
874   ierr = ISDestroy(&is_loc_filt);CHKERRQ(ierr);
875   ierr = ISCreateStride(PetscObjectComm((PetscObject)lJ),nv/dof,0,1,&is_glob);CHKERRQ(ierr);
876   ierr = ISGetIndices(is_glob,&idx);CHKERRQ(ierr);
877   ierr = ISGlobalToLocalMappingApplyBlock(ltog,IS_GTOLM_MASK,nv/dof,idx,&nb,gidx);CHKERRQ(ierr);
878   ierr = ISRestoreIndices(is_glob,&idx);CHKERRQ(ierr);
879   ierr = ISDestroy(&is_glob);CHKERRQ(ierr);
880   ierr = ISLocalToGlobalMappingDestroy(&ltog);CHKERRQ(ierr);
881   ierr = ISCreateBlock(PETSC_COMM_SELF,dof,nb,gidx,PETSC_USE_POINTER,&is_loc_filt);CHKERRQ(ierr);
882   ierr = ISLocalToGlobalMappingCreateIS(is_loc_filt,&ltog);CHKERRQ(ierr);
883   ierr = ISDestroy(&is_loc_filt);CHKERRQ(ierr);
884   ierr = MatSetLocalToGlobalMapping(lJ,ltog,ltog);CHKERRQ(ierr);
885   ierr = ISLocalToGlobalMappingDestroy(&ltog);CHKERRQ(ierr);
886   ierr = PetscFree(gidx);CHKERRQ(ierr);
887 
888   /* Preallocation (not exact): we reuse the preallocation routines of the assembled version  */
889   flg = dm->prealloc_only;
890   dm->prealloc_only = PETSC_TRUE;
891   switch (dim) {
892   case 1:
893     ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",MatISSetPreallocation_IS);CHKERRQ(ierr);
894     ierr = DMCreateMatrix_DA_1d_MPIAIJ(dm,J,PETSC_TRUE);CHKERRQ(ierr);
895     ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
896     break;
897   case 2:
898     ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",MatISSetPreallocation_IS);CHKERRQ(ierr);
899     ierr = DMCreateMatrix_DA_2d_MPIAIJ(dm,J,PETSC_TRUE);CHKERRQ(ierr);
900     ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
901     break;
902   case 3:
903     ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",MatISSetPreallocation_IS);CHKERRQ(ierr);
904     ierr = DMCreateMatrix_DA_3d_MPIAIJ(dm,J,PETSC_TRUE);CHKERRQ(ierr);
905     ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
906     break;
907   default:
908     SETERRQ1(PetscObjectComm((PetscObject)dm),PETSC_ERR_SUP,"Unhandled dimension %d",dim);
909   }
910   dm->prealloc_only = flg;
911   PetscFunctionReturn(0);
912 }
913 
914 PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da,Mat J)
915 {
916   PetscErrorCode         ierr;
917   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p;
918   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
919   MPI_Comm               comm;
920   PetscScalar            *values;
921   DMBoundaryType         bx,by;
922   ISLocalToGlobalMapping ltog;
923   DMDAStencilType        st;
924 
925   PetscFunctionBegin;
926   /*
927          nc - number of components per grid point
928          col - number of colors needed in one direction for single component problem
929 
930   */
931   ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr);
932   col  = 2*s + 1;
933   ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr);
934   ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr);
935   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
936 
937   ierr = PetscMalloc2(nc,&rows,col*col*nc*nc,&cols);CHKERRQ(ierr);
938   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
939 
940   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
941   /* determine the matrix preallocation information */
942   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
943   for (i=xs; i<xs+nx; i++) {
944 
945     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
946     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
947 
948     for (j=ys; j<ys+ny; j++) {
949       slot = i - gxs + gnx*(j - gys);
950 
951       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
952       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
953 
954       cnt = 0;
955       for (k=0; k<nc; k++) {
956         for (l=lstart; l<lend+1; l++) {
957           for (p=pstart; p<pend+1; p++) {
958             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
959               cols[cnt++] = k + nc*(slot + gnx*l + p);
960             }
961           }
962         }
963         rows[k] = k + nc*(slot);
964       }
965       ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
966     }
967   }
968   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
969   ierr = MatSeqSELLSetPreallocation(J,0,dnz);CHKERRQ(ierr);
970   ierr = MatMPISELLSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
971   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
972 
973   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
974 
975   /*
976     For each node in the grid: we get the neighbors in the local (on processor ordering
977     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
978     PETSc ordering.
979   */
980   if (!da->prealloc_only) {
981     ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr);
982     for (i=xs; i<xs+nx; i++) {
983 
984       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
985       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
986 
987       for (j=ys; j<ys+ny; j++) {
988         slot = i - gxs + gnx*(j - gys);
989 
990         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
991         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
992 
993         cnt = 0;
994         for (k=0; k<nc; k++) {
995           for (l=lstart; l<lend+1; l++) {
996             for (p=pstart; p<pend+1; p++) {
997               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
998                 cols[cnt++] = k + nc*(slot + gnx*l + p);
999               }
1000             }
1001           }
1002           rows[k] = k + nc*(slot);
1003         }
1004         ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1005       }
1006     }
1007     ierr = PetscFree(values);CHKERRQ(ierr);
1008     /* do not copy values to GPU since they are all zero and not yet needed there */
1009     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1010     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1011     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1012     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1013     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1014   }
1015   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1016   PetscFunctionReturn(0);
1017 }
1018 
1019 PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da,Mat J)
1020 {
1021   PetscErrorCode         ierr;
1022   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1023   PetscInt               m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL;
1024   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
1025   MPI_Comm               comm;
1026   PetscScalar            *values;
1027   DMBoundaryType         bx,by,bz;
1028   ISLocalToGlobalMapping ltog;
1029   DMDAStencilType        st;
1030 
1031   PetscFunctionBegin;
1032   /*
1033          nc - number of components per grid point
1034          col - number of colors needed in one direction for single component problem
1035 
1036   */
1037   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1038   col  = 2*s + 1;
1039   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1040   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1041   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1042 
1043   ierr = PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols);CHKERRQ(ierr);
1044   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1045 
1046   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1047   /* determine the matrix preallocation information */
1048   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1049   for (i=xs; i<xs+nx; i++) {
1050     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1051     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1052     for (j=ys; j<ys+ny; j++) {
1053       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1054       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1055       for (k=zs; k<zs+nz; k++) {
1056         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1057         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1058 
1059         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1060 
1061         cnt = 0;
1062         for (l=0; l<nc; l++) {
1063           for (ii=istart; ii<iend+1; ii++) {
1064             for (jj=jstart; jj<jend+1; jj++) {
1065               for (kk=kstart; kk<kend+1; kk++) {
1066                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1067                   cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1068                 }
1069               }
1070             }
1071           }
1072           rows[l] = l + nc*(slot);
1073         }
1074         ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1075       }
1076     }
1077   }
1078   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1079   ierr = MatSeqSELLSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1080   ierr = MatMPISELLSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1081   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1082   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1083 
1084   /*
1085     For each node in the grid: we get the neighbors in the local (on processor ordering
1086     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1087     PETSc ordering.
1088   */
1089   if (!da->prealloc_only) {
1090     ierr = PetscCalloc1(col*col*col*nc*nc*nc,&values);CHKERRQ(ierr);
1091     for (i=xs; i<xs+nx; i++) {
1092       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1093       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1094       for (j=ys; j<ys+ny; j++) {
1095         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1096         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1097         for (k=zs; k<zs+nz; k++) {
1098           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1099           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1100 
1101           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1102 
1103           cnt = 0;
1104           for (l=0; l<nc; l++) {
1105             for (ii=istart; ii<iend+1; ii++) {
1106               for (jj=jstart; jj<jend+1; jj++) {
1107                 for (kk=kstart; kk<kend+1; kk++) {
1108                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1109                     cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1110                   }
1111                 }
1112               }
1113             }
1114             rows[l] = l + nc*(slot);
1115           }
1116           ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1117         }
1118       }
1119     }
1120     ierr = PetscFree(values);CHKERRQ(ierr);
1121     /* do not copy values to GPU since they are all zero and not yet needed there */
1122     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1123     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1124     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1125     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1126     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1127   }
1128   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1129   PetscFunctionReturn(0);
1130 }
1131 
1132 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da,Mat J,PetscBool isIS)
1133 {
1134   PetscErrorCode         ierr;
1135   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,M,N;
1136   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
1137   MPI_Comm               comm;
1138   DMBoundaryType         bx,by;
1139   ISLocalToGlobalMapping ltog,mltog;
1140   DMDAStencilType        st;
1141   PetscBool              removedups = PETSC_FALSE;
1142 
1143   PetscFunctionBegin;
1144   /*
1145          nc - number of components per grid point
1146          col - number of colors needed in one direction for single component problem
1147 
1148   */
1149   ierr = DMDAGetInfo(da,&dim,&m,&n,&M,&N,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr);
1150   if (!isIS && bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) {
1151     ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE);CHKERRQ(ierr);
1152   }
1153   col  = 2*s + 1;
1154   /*
1155        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1156        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1157   */
1158   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1159   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1160   ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr);
1161   ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr);
1162   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1163 
1164   ierr = PetscMalloc2(nc,&rows,col*col*nc*nc,&cols);CHKERRQ(ierr);
1165   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1166 
1167   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1168   /* determine the matrix preallocation information */
1169   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
1170   for (i=xs; i<xs+nx; i++) {
1171 
1172     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1173     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1174 
1175     for (j=ys; j<ys+ny; j++) {
1176       slot = i - gxs + gnx*(j - gys);
1177 
1178       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1179       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1180 
1181       cnt = 0;
1182       for (k=0; k<nc; k++) {
1183         for (l=lstart; l<lend+1; l++) {
1184           for (p=pstart; p<pend+1; p++) {
1185             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
1186               cols[cnt++] = k + nc*(slot + gnx*l + p);
1187             }
1188           }
1189         }
1190         rows[k] = k + nc*(slot);
1191       }
1192       if (removedups) {
1193         ierr = MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1194       } else {
1195         ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1196       }
1197     }
1198   }
1199   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1200   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1201   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1202   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1203   ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr);
1204   if (!mltog) {
1205     ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1206   }
1207 
1208   /*
1209     For each node in the grid: we get the neighbors in the local (on processor ordering
1210     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1211     PETSc ordering.
1212   */
1213   if (!da->prealloc_only) {
1214     for (i=xs; i<xs+nx; i++) {
1215 
1216       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1217       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1218 
1219       for (j=ys; j<ys+ny; j++) {
1220         slot = i - gxs + gnx*(j - gys);
1221 
1222         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1223         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1224 
1225         cnt = 0;
1226         for (l=lstart; l<lend+1; l++) {
1227           for (p=pstart; p<pend+1; p++) {
1228             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
1229               cols[cnt++] = nc*(slot + gnx*l + p);
1230               for (k=1; k<nc; k++) {
1231                 cols[cnt] = 1 + cols[cnt-1];cnt++;
1232               }
1233             }
1234           }
1235         }
1236         for (k=0; k<nc; k++) rows[k] = k + nc*(slot);
1237         ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr);
1238       }
1239     }
1240     /* do not copy values to GPU since they are all zero and not yet needed there */
1241     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1242     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1243     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1244     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1245     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1246     if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) {
1247       ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr);
1248     }
1249   }
1250   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1251   PetscFunctionReturn(0);
1252 }
1253 
1254 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da,Mat J)
1255 {
1256   PetscErrorCode         ierr;
1257   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1258   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt,maxcnt = 0,l,p,M,N;
1259   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
1260   DM_DA                  *dd = (DM_DA*)da->data;
1261   PetscInt               ifill_col,*ofill = dd->ofill, *dfill = dd->dfill;
1262   MPI_Comm               comm;
1263   DMBoundaryType         bx,by;
1264   ISLocalToGlobalMapping ltog;
1265   DMDAStencilType        st;
1266   PetscBool              removedups = PETSC_FALSE;
1267 
1268   PetscFunctionBegin;
1269   /*
1270          nc - number of components per grid point
1271          col - number of colors needed in one direction for single component problem
1272 
1273   */
1274   ierr = DMDAGetInfo(da,&dim,&m,&n,&M,&N,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr);
1275   col  = 2*s + 1;
1276   /*
1277        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1278        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1279   */
1280   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1281   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1282   ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr);
1283   ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr);
1284   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1285 
1286   ierr = PetscMalloc1(col*col*nc,&cols);CHKERRQ(ierr);
1287   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1288 
1289   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1290   /* determine the matrix preallocation information */
1291   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
1292   for (i=xs; i<xs+nx; i++) {
1293 
1294     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1295     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1296 
1297     for (j=ys; j<ys+ny; j++) {
1298       slot = i - gxs + gnx*(j - gys);
1299 
1300       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1301       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1302 
1303       for (k=0; k<nc; k++) {
1304         cnt = 0;
1305         for (l=lstart; l<lend+1; l++) {
1306           for (p=pstart; p<pend+1; p++) {
1307             if (l || p) {
1308               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
1309                 for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p);
1310               }
1311             } else {
1312               if (dfill) {
1313                 for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p);
1314               } else {
1315                 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p);
1316               }
1317             }
1318           }
1319         }
1320         row    = k + nc*(slot);
1321         maxcnt = PetscMax(maxcnt,cnt);
1322         if (removedups) {
1323           ierr   = MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1324         } else {
1325           ierr   = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1326         }
1327       }
1328     }
1329   }
1330   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1331   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1332   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1333   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1334 
1335   /*
1336     For each node in the grid: we get the neighbors in the local (on processor ordering
1337     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1338     PETSc ordering.
1339   */
1340   if (!da->prealloc_only) {
1341     for (i=xs; i<xs+nx; i++) {
1342 
1343       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1344       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1345 
1346       for (j=ys; j<ys+ny; j++) {
1347         slot = i - gxs + gnx*(j - gys);
1348 
1349         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1350         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1351 
1352         for (k=0; k<nc; k++) {
1353           cnt = 0;
1354           for (l=lstart; l<lend+1; l++) {
1355             for (p=pstart; p<pend+1; p++) {
1356               if (l || p) {
1357                 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
1358                   for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p);
1359                 }
1360               } else {
1361                 if (dfill) {
1362                   for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p);
1363                 } else {
1364                   for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p);
1365                 }
1366               }
1367             }
1368           }
1369           row  = k + nc*(slot);
1370           ierr = MatSetValuesLocal(J,1,&row,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr);
1371         }
1372       }
1373     }
1374     /* do not copy values to GPU since they are all zero and not yet needed there */
1375     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1376     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1377     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1378     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1379     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1380   }
1381   ierr = PetscFree(cols);CHKERRQ(ierr);
1382   PetscFunctionReturn(0);
1383 }
1384 
1385 /* ---------------------------------------------------------------------------------*/
1386 
1387 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da,Mat J,PetscBool isIS)
1388 {
1389   PetscErrorCode         ierr;
1390   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1391   PetscInt               m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL;
1392   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
1393   MPI_Comm               comm;
1394   DMBoundaryType         bx,by,bz;
1395   ISLocalToGlobalMapping ltog,mltog;
1396   DMDAStencilType        st;
1397   PetscBool              removedups = PETSC_FALSE;
1398 
1399   PetscFunctionBegin;
1400   /*
1401          nc - number of components per grid point
1402          col - number of colors needed in one direction for single component problem
1403 
1404   */
1405   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1406   if (!isIS && bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) {
1407     ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE);CHKERRQ(ierr);
1408   }
1409   col  = 2*s + 1;
1410 
1411   /*
1412        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1413        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1414   */
1415   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1416   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1417   if (P == 1 && 2*s >= p) removedups = PETSC_TRUE;
1418 
1419   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1420   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1421   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1422 
1423   ierr = PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols);CHKERRQ(ierr);
1424   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1425 
1426   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1427   /* determine the matrix preallocation information */
1428   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1429   for (i=xs; i<xs+nx; i++) {
1430     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1431     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1432     for (j=ys; j<ys+ny; j++) {
1433       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1434       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1435       for (k=zs; k<zs+nz; k++) {
1436         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1437         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1438 
1439         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1440 
1441         cnt = 0;
1442         for (l=0; l<nc; l++) {
1443           for (ii=istart; ii<iend+1; ii++) {
1444             for (jj=jstart; jj<jend+1; jj++) {
1445               for (kk=kstart; kk<kend+1; kk++) {
1446                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1447                   cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1448                 }
1449               }
1450             }
1451           }
1452           rows[l] = l + nc*(slot);
1453         }
1454         if (removedups) {
1455           ierr = MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1456         } else {
1457           ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1458         }
1459       }
1460     }
1461   }
1462   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1463   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1464   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1465   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1466   ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr);
1467   if (!mltog) {
1468     ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1469   }
1470 
1471   /*
1472     For each node in the grid: we get the neighbors in the local (on processor ordering
1473     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1474     PETSc ordering.
1475   */
1476   if (!da->prealloc_only) {
1477     for (i=xs; i<xs+nx; i++) {
1478       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1479       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1480       for (j=ys; j<ys+ny; j++) {
1481         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1482         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1483         for (k=zs; k<zs+nz; k++) {
1484           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1485           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1486 
1487           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1488 
1489           cnt = 0;
1490           for (kk=kstart; kk<kend+1; kk++) {
1491             for (jj=jstart; jj<jend+1; jj++) {
1492               for (ii=istart; ii<iend+1; ii++) {
1493                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1494                   cols[cnt++] = nc*(slot + ii + gnx*jj + gnx*gny*kk);
1495                     for (l=1; l<nc; l++) {
1496                       cols[cnt] = 1 + cols[cnt-1];cnt++;
1497                   }
1498                 }
1499               }
1500             }
1501           }
1502           rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1];
1503           ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr);
1504         }
1505       }
1506     }
1507     /* do not copy values to GPU since they are all zero and not yet needed there */
1508     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1509     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1510     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1511     if (!isIS && bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) {
1512       ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr);
1513     }
1514     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1515     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1516   }
1517   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1518   PetscFunctionReturn(0);
1519 }
1520 
1521 /* ---------------------------------------------------------------------------------*/
1522 
1523 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da,Mat J)
1524 {
1525   PetscErrorCode         ierr;
1526   DM_DA                  *dd = (DM_DA*)da->data;
1527   PetscInt               xs,nx,i,j,gxs,gnx,row,k,l;
1528   PetscInt               m,dim,s,*cols = NULL,nc,cnt,maxcnt = 0,*ocols;
1529   PetscInt               *ofill = dd->ofill,*dfill = dd->dfill;
1530   DMBoundaryType         bx;
1531   ISLocalToGlobalMapping ltog;
1532   PetscMPIInt            rank,size;
1533 
1534   PetscFunctionBegin;
1535   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)da),&rank);CHKERRQ(ierr);
1536   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)da),&size);CHKERRQ(ierr);
1537 
1538   /*
1539          nc - number of components per grid point
1540 
1541   */
1542   ierr = DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);CHKERRQ(ierr);
1543   if (s > 1) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Matrix creation for 1d not implemented correctly for stencil width larger than 1");
1544   ierr = DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);CHKERRQ(ierr);
1545   ierr = DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);CHKERRQ(ierr);
1546 
1547   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1548   ierr = PetscCalloc2(nx*nc,&cols,nx*nc,&ocols);CHKERRQ(ierr);
1549 
1550   /*
1551         note should be smaller for first and last process with no periodic
1552         does not handle dfill
1553   */
1554   cnt = 0;
1555   /* coupling with process to the left */
1556   for (i=0; i<s; i++) {
1557     for (j=0; j<nc; j++) {
1558       ocols[cnt] = ((!rank) ? 0 : (s - i)*(ofill[j+1] - ofill[j]));
1559       cols[cnt]  = dfill[j+1] - dfill[j] + (s + i)*(ofill[j+1] - ofill[j]);
1560       if (!rank && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1561         if (size > 1) ocols[cnt] += (s - i)*(ofill[j+1] - ofill[j]);
1562         else cols[cnt] += (s - i)*(ofill[j+1] - ofill[j]);
1563       }
1564       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1565       cnt++;
1566     }
1567   }
1568   for (i=s; i<nx-s; i++) {
1569     for (j=0; j<nc; j++) {
1570       cols[cnt] = dfill[j+1] - dfill[j] + 2*s*(ofill[j+1] - ofill[j]);
1571       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1572       cnt++;
1573     }
1574   }
1575   /* coupling with process to the right */
1576   for (i=nx-s; i<nx; i++) {
1577     for (j=0; j<nc; j++) {
1578       ocols[cnt] = ((rank == (size-1)) ? 0 : (i - nx + s + 1)*(ofill[j+1] - ofill[j]));
1579       cols[cnt]  = dfill[j+1] - dfill[j] + (s + nx - i - 1)*(ofill[j+1] - ofill[j]);
1580       if ((rank == size-1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1581         if (size > 1) ocols[cnt] += (i - nx + s + 1)*(ofill[j+1] - ofill[j]);
1582         else cols[cnt] += (i - nx + s + 1)*(ofill[j+1] - ofill[j]);
1583       }
1584       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1585       cnt++;
1586     }
1587   }
1588 
1589   ierr = MatSeqAIJSetPreallocation(J,0,cols);CHKERRQ(ierr);
1590   ierr = MatMPIAIJSetPreallocation(J,0,cols,0,ocols);CHKERRQ(ierr);
1591   ierr = PetscFree2(cols,ocols);CHKERRQ(ierr);
1592 
1593   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1594   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1595 
1596   /*
1597     For each node in the grid: we get the neighbors in the local (on processor ordering
1598     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1599     PETSc ordering.
1600   */
1601   if (!da->prealloc_only) {
1602     ierr = PetscMalloc1(maxcnt,&cols);CHKERRQ(ierr);
1603     row = xs*nc;
1604     /* coupling with process to the left */
1605     for (i=xs; i<xs+s; i++) {
1606       for (j=0; j<nc; j++) {
1607         cnt = 0;
1608         if (rank) {
1609           for (l=0; l<s; l++) {
1610             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1611           }
1612         }
1613         if (!rank && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1614           for (l=0; l<s; l++) {
1615             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (m + i - s - l)*nc + ofill[k];
1616           }
1617         }
1618         if (dfill) {
1619           for (k=dfill[j]; k<dfill[j+1]; k++) {
1620             cols[cnt++] = i*nc + dfill[k];
1621           }
1622         } else {
1623           for (k=0; k<nc; k++) {
1624             cols[cnt++] = i*nc + k;
1625           }
1626         }
1627         for (l=0; l<s; l++) {
1628           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1629         }
1630         ierr = MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr);
1631         row++;
1632       }
1633     }
1634     for (i=xs+s; i<xs+nx-s; i++) {
1635       for (j=0; j<nc; j++) {
1636         cnt = 0;
1637         for (l=0; l<s; l++) {
1638           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1639         }
1640         if (dfill) {
1641           for (k=dfill[j]; k<dfill[j+1]; k++) {
1642             cols[cnt++] = i*nc + dfill[k];
1643           }
1644         } else {
1645           for (k=0; k<nc; k++) {
1646             cols[cnt++] = i*nc + k;
1647           }
1648         }
1649         for (l=0; l<s; l++) {
1650           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1651         }
1652         ierr = MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr);
1653         row++;
1654       }
1655     }
1656     /* coupling with process to the right */
1657     for (i=xs+nx-s; i<xs+nx; i++) {
1658       for (j=0; j<nc; j++) {
1659         cnt = 0;
1660         for (l=0; l<s; l++) {
1661           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1662         }
1663         if (dfill) {
1664           for (k=dfill[j]; k<dfill[j+1]; k++) {
1665             cols[cnt++] = i*nc + dfill[k];
1666           }
1667         } else {
1668           for (k=0; k<nc; k++) {
1669             cols[cnt++] = i*nc + k;
1670           }
1671         }
1672         if (rank < size-1) {
1673           for (l=0; l<s; l++) {
1674             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1675           }
1676         }
1677         if ((rank == size-1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1678           for (l=0; l<s; l++) {
1679             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s - l - m + 2)*nc + ofill[k];
1680           }
1681         }
1682         ierr = MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr);
1683         row++;
1684       }
1685     }
1686     ierr = PetscFree(cols);CHKERRQ(ierr);
1687     /* do not copy values to GPU since they are all zero and not yet needed there */
1688     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1689     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1690     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1691     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1692     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1693   }
1694   PetscFunctionReturn(0);
1695 }
1696 
1697 /* ---------------------------------------------------------------------------------*/
1698 
1699 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da,Mat J,PetscBool isIS)
1700 {
1701   PetscErrorCode         ierr;
1702   PetscInt               xs,nx,i,i1,slot,gxs,gnx;
1703   PetscInt               m,dim,s,*cols = NULL,nc,*rows = NULL,col,cnt,l;
1704   PetscInt               istart,iend;
1705   DMBoundaryType         bx;
1706   ISLocalToGlobalMapping ltog,mltog;
1707 
1708   PetscFunctionBegin;
1709   /*
1710          nc - number of components per grid point
1711          col - number of colors needed in one direction for single component problem
1712 
1713   */
1714   ierr = DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);CHKERRQ(ierr);
1715   if (!isIS && bx == DM_BOUNDARY_NONE) {
1716     ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE);CHKERRQ(ierr);
1717   }
1718   col  = 2*s + 1;
1719 
1720   ierr = DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);CHKERRQ(ierr);
1721   ierr = DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);CHKERRQ(ierr);
1722 
1723   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1724   ierr = MatSeqAIJSetPreallocation(J,col*nc,NULL);CHKERRQ(ierr);
1725   ierr = MatMPIAIJSetPreallocation(J,col*nc,NULL,col*nc,NULL);CHKERRQ(ierr);
1726 
1727   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1728   ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr);
1729   if (!mltog) {
1730     ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1731   }
1732 
1733   /*
1734     For each node in the grid: we get the neighbors in the local (on processor ordering
1735     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1736     PETSc ordering.
1737   */
1738   if (!da->prealloc_only) {
1739     ierr = PetscMalloc2(nc,&rows,col*nc*nc,&cols);CHKERRQ(ierr);
1740     for (i=xs; i<xs+nx; i++) {
1741       istart = PetscMax(-s,gxs - i);
1742       iend   = PetscMin(s,gxs + gnx - i - 1);
1743       slot   = i - gxs;
1744 
1745       cnt = 0;
1746       for (i1=istart; i1<iend+1; i1++) {
1747         cols[cnt++] = nc*(slot + i1);
1748         for (l=1; l<nc; l++) {
1749           cols[cnt] = 1 + cols[cnt-1];cnt++;
1750         }
1751       }
1752       rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1];
1753       ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr);
1754     }
1755     /* do not copy values to GPU since they are all zero and not yet needed there */
1756     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1757     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1758     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1759     if (!isIS && bx == DM_BOUNDARY_NONE) {
1760       ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr);
1761     }
1762     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1763     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1764     ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1765   }
1766   PetscFunctionReturn(0);
1767 }
1768 
1769 /* ---------------------------------------------------------------------------------*/
1770 
1771 PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM da,Mat J,PetscBool isIS)
1772 {
1773   PetscErrorCode         ierr;
1774   PetscInt               xs,nx,i,i1,slot,gxs,gnx;
1775   PetscInt               m,dim,s,*cols = NULL,nc,*rows = NULL,col,cnt,l;
1776   PetscInt               istart,iend;
1777   DMBoundaryType         bx;
1778   ISLocalToGlobalMapping ltog,mltog;
1779 
1780   PetscFunctionBegin;
1781   /*
1782          nc - number of components per grid point
1783          col - number of colors needed in one direction for single component problem
1784   */
1785   ierr = DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);CHKERRQ(ierr);
1786   col  = 2*s + 1;
1787 
1788   ierr = DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);CHKERRQ(ierr);
1789   ierr = DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);CHKERRQ(ierr);
1790 
1791   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1792   ierr = MatSeqAIJSetTotalPreallocation(J,nx*nc*col*nc);CHKERRQ(ierr);
1793 
1794   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1795   ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr);
1796   if (!mltog) {
1797     ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1798   }
1799 
1800   /*
1801     For each node in the grid: we get the neighbors in the local (on processor ordering
1802     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1803     PETSc ordering.
1804   */
1805   if (!da->prealloc_only) {
1806     ierr = PetscMalloc2(nc,&rows,col*nc*nc,&cols);CHKERRQ(ierr);
1807     for (i=xs; i<xs+nx; i++) {
1808       istart = PetscMax(-s,gxs - i);
1809       iend   = PetscMin(s,gxs + gnx - i - 1);
1810       slot   = i - gxs;
1811 
1812       cnt = 0;
1813       for (i1=istart; i1<iend+1; i1++) {
1814         cols[cnt++] = nc*(slot + i1);
1815         for (l=1; l<nc; l++) {
1816           cols[cnt] = 1 + cols[cnt-1];cnt++;
1817         }
1818       }
1819       rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1];
1820       ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr);
1821     }
1822     /* do not copy values to GPU since they are all zero and not yet needed there */
1823     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1824     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1825     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1826     if (!isIS && bx == DM_BOUNDARY_NONE) {
1827       ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr);
1828     }
1829     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1830     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1831     ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1832   }
1833   ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr);
1834   PetscFunctionReturn(0);
1835 }
1836 
1837 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da,Mat J)
1838 {
1839   PetscErrorCode         ierr;
1840   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1841   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
1842   PetscInt               istart,iend,jstart,jend,ii,jj;
1843   MPI_Comm               comm;
1844   PetscScalar            *values;
1845   DMBoundaryType         bx,by;
1846   DMDAStencilType        st;
1847   ISLocalToGlobalMapping ltog;
1848 
1849   PetscFunctionBegin;
1850   /*
1851      nc - number of components per grid point
1852      col - number of colors needed in one direction for single component problem
1853   */
1854   ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr);
1855   col  = 2*s + 1;
1856 
1857   ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr);
1858   ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr);
1859   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1860 
1861   ierr = PetscMalloc1(col*col*nc*nc,&cols);CHKERRQ(ierr);
1862 
1863   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1864 
1865   /* determine the matrix preallocation information */
1866   ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
1867   for (i=xs; i<xs+nx; i++) {
1868     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1869     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1870     for (j=ys; j<ys+ny; j++) {
1871       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1872       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1873       slot   = i - gxs + gnx*(j - gys);
1874 
1875       /* Find block columns in block row */
1876       cnt = 0;
1877       for (ii=istart; ii<iend+1; ii++) {
1878         for (jj=jstart; jj<jend+1; jj++) {
1879           if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1880             cols[cnt++] = slot + ii + gnx*jj;
1881           }
1882         }
1883       }
1884       ierr = MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1885     }
1886   }
1887   ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1888   ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1889   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1890 
1891   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1892 
1893   /*
1894     For each node in the grid: we get the neighbors in the local (on processor ordering
1895     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1896     PETSc ordering.
1897   */
1898   if (!da->prealloc_only) {
1899     ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr);
1900     for (i=xs; i<xs+nx; i++) {
1901       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1902       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1903       for (j=ys; j<ys+ny; j++) {
1904         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1905         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1906         slot = i - gxs + gnx*(j - gys);
1907         cnt  = 0;
1908         for (ii=istart; ii<iend+1; ii++) {
1909           for (jj=jstart; jj<jend+1; jj++) {
1910             if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1911               cols[cnt++] = slot + ii + gnx*jj;
1912             }
1913           }
1914         }
1915         ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1916       }
1917     }
1918     ierr = PetscFree(values);CHKERRQ(ierr);
1919     /* do not copy values to GPU since they are all zero and not yet needed there */
1920     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1921     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1922     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1923     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1924     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1925   }
1926   ierr = PetscFree(cols);CHKERRQ(ierr);
1927   PetscFunctionReturn(0);
1928 }
1929 
1930 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da,Mat J)
1931 {
1932   PetscErrorCode         ierr;
1933   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1934   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
1935   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1936   MPI_Comm               comm;
1937   PetscScalar            *values;
1938   DMBoundaryType         bx,by,bz;
1939   DMDAStencilType        st;
1940   ISLocalToGlobalMapping ltog;
1941 
1942   PetscFunctionBegin;
1943   /*
1944          nc - number of components per grid point
1945          col - number of colors needed in one direction for single component problem
1946 
1947   */
1948   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,NULL,NULL,NULL,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1949   col  = 2*s + 1;
1950 
1951   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1952   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1953   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1954 
1955   ierr = PetscMalloc1(col*col*col,&cols);CHKERRQ(ierr);
1956 
1957   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1958 
1959   /* determine the matrix preallocation information */
1960   ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1961   for (i=xs; i<xs+nx; i++) {
1962     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1963     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1964     for (j=ys; j<ys+ny; j++) {
1965       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1966       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1967       for (k=zs; k<zs+nz; k++) {
1968         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1969         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1970 
1971         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1972 
1973         /* Find block columns in block row */
1974         cnt = 0;
1975         for (ii=istart; ii<iend+1; ii++) {
1976           for (jj=jstart; jj<jend+1; jj++) {
1977             for (kk=kstart; kk<kend+1; kk++) {
1978               if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1979                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1980               }
1981             }
1982           }
1983         }
1984         ierr = MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1985       }
1986     }
1987   }
1988   ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1989   ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1990   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1991 
1992   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1993 
1994   /*
1995     For each node in the grid: we get the neighbors in the local (on processor ordering
1996     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1997     PETSc ordering.
1998   */
1999   if (!da->prealloc_only) {
2000     ierr = PetscCalloc1(col*col*col*nc*nc,&values);CHKERRQ(ierr);
2001     for (i=xs; i<xs+nx; i++) {
2002       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2003       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2004       for (j=ys; j<ys+ny; j++) {
2005         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2006         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2007         for (k=zs; k<zs+nz; k++) {
2008           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2009           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2010 
2011           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2012 
2013           cnt = 0;
2014           for (ii=istart; ii<iend+1; ii++) {
2015             for (jj=jstart; jj<jend+1; jj++) {
2016               for (kk=kstart; kk<kend+1; kk++) {
2017                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
2018                   cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
2019                 }
2020               }
2021             }
2022           }
2023           ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
2024         }
2025       }
2026     }
2027     ierr = PetscFree(values);CHKERRQ(ierr);
2028     /* do not copy values to GPU since they are all zero and not yet needed there */
2029     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
2030     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2031     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2032     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
2033     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2034   }
2035   ierr = PetscFree(cols);CHKERRQ(ierr);
2036   PetscFunctionReturn(0);
2037 }
2038 
2039 /*
2040   This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to
2041   identify in the local ordering with periodic domain.
2042 */
2043 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog,PetscInt *row,PetscInt *cnt,PetscInt col[])
2044 {
2045   PetscErrorCode ierr;
2046   PetscInt       i,n;
2047 
2048   PetscFunctionBegin;
2049   ierr = ISLocalToGlobalMappingApplyBlock(ltog,1,row,row);CHKERRQ(ierr);
2050   ierr = ISLocalToGlobalMappingApplyBlock(ltog,*cnt,col,col);CHKERRQ(ierr);
2051   for (i=0,n=0; i<*cnt; i++) {
2052     if (col[i] >= *row) col[n++] = col[i];
2053   }
2054   *cnt = n;
2055   PetscFunctionReturn(0);
2056 }
2057 
2058 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da,Mat J)
2059 {
2060   PetscErrorCode         ierr;
2061   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
2062   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
2063   PetscInt               istart,iend,jstart,jend,ii,jj;
2064   MPI_Comm               comm;
2065   PetscScalar            *values;
2066   DMBoundaryType         bx,by;
2067   DMDAStencilType        st;
2068   ISLocalToGlobalMapping ltog;
2069 
2070   PetscFunctionBegin;
2071   /*
2072      nc - number of components per grid point
2073      col - number of colors needed in one direction for single component problem
2074   */
2075   ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr);
2076   col  = 2*s + 1;
2077 
2078   ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr);
2079   ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr);
2080   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
2081 
2082   ierr = PetscMalloc1(col*col*nc*nc,&cols);CHKERRQ(ierr);
2083 
2084   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
2085 
2086   /* determine the matrix preallocation information */
2087   ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
2088   for (i=xs; i<xs+nx; i++) {
2089     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2090     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2091     for (j=ys; j<ys+ny; j++) {
2092       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2093       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2094       slot   = i - gxs + gnx*(j - gys);
2095 
2096       /* Find block columns in block row */
2097       cnt = 0;
2098       for (ii=istart; ii<iend+1; ii++) {
2099         for (jj=jstart; jj<jend+1; jj++) {
2100           if (st == DMDA_STENCIL_BOX || !ii || !jj) {
2101             cols[cnt++] = slot + ii + gnx*jj;
2102           }
2103         }
2104       }
2105       ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
2106       ierr = MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
2107     }
2108   }
2109   ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
2110   ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
2111   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2112 
2113   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
2114 
2115   /*
2116     For each node in the grid: we get the neighbors in the local (on processor ordering
2117     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2118     PETSc ordering.
2119   */
2120   if (!da->prealloc_only) {
2121     ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr);
2122     for (i=xs; i<xs+nx; i++) {
2123       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2124       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2125       for (j=ys; j<ys+ny; j++) {
2126         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2127         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2128         slot   = i - gxs + gnx*(j - gys);
2129 
2130         /* Find block columns in block row */
2131         cnt = 0;
2132         for (ii=istart; ii<iend+1; ii++) {
2133           for (jj=jstart; jj<jend+1; jj++) {
2134             if (st == DMDA_STENCIL_BOX || !ii || !jj) {
2135               cols[cnt++] = slot + ii + gnx*jj;
2136             }
2137           }
2138         }
2139         ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
2140         ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
2141       }
2142     }
2143     ierr = PetscFree(values);CHKERRQ(ierr);
2144     /* do not copy values to GPU since they are all zero and not yet needed there */
2145     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
2146     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2147     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2148     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
2149     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2150   }
2151   ierr = PetscFree(cols);CHKERRQ(ierr);
2152   PetscFunctionReturn(0);
2153 }
2154 
2155 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da,Mat J)
2156 {
2157   PetscErrorCode         ierr;
2158   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
2159   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
2160   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
2161   MPI_Comm               comm;
2162   PetscScalar            *values;
2163   DMBoundaryType         bx,by,bz;
2164   DMDAStencilType        st;
2165   ISLocalToGlobalMapping ltog;
2166 
2167   PetscFunctionBegin;
2168   /*
2169      nc - number of components per grid point
2170      col - number of colors needed in one direction for single component problem
2171   */
2172   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,NULL,NULL,NULL,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
2173   col  = 2*s + 1;
2174 
2175   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
2176   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
2177   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
2178 
2179   /* create the matrix */
2180   ierr = PetscMalloc1(col*col*col,&cols);CHKERRQ(ierr);
2181 
2182   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
2183 
2184   /* determine the matrix preallocation information */
2185   ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
2186   for (i=xs; i<xs+nx; i++) {
2187     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2188     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2189     for (j=ys; j<ys+ny; j++) {
2190       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2191       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2192       for (k=zs; k<zs+nz; k++) {
2193         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2194         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2195 
2196         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2197 
2198         /* Find block columns in block row */
2199         cnt = 0;
2200         for (ii=istart; ii<iend+1; ii++) {
2201           for (jj=jstart; jj<jend+1; jj++) {
2202             for (kk=kstart; kk<kend+1; kk++) {
2203               if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
2204                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
2205               }
2206             }
2207           }
2208         }
2209         ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
2210         ierr = MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
2211       }
2212     }
2213   }
2214   ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
2215   ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
2216   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2217 
2218   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
2219 
2220   /*
2221     For each node in the grid: we get the neighbors in the local (on processor ordering
2222     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2223     PETSc ordering.
2224   */
2225   if (!da->prealloc_only) {
2226     ierr = PetscCalloc1(col*col*col*nc*nc,&values);CHKERRQ(ierr);
2227     for (i=xs; i<xs+nx; i++) {
2228       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2229       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2230       for (j=ys; j<ys+ny; j++) {
2231         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2232         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2233         for (k=zs; k<zs+nz; k++) {
2234           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2235           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2236 
2237           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2238 
2239           cnt = 0;
2240           for (ii=istart; ii<iend+1; ii++) {
2241             for (jj=jstart; jj<jend+1; jj++) {
2242               for (kk=kstart; kk<kend+1; kk++) {
2243                 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
2244                   cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
2245                 }
2246               }
2247             }
2248           }
2249           ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
2250           ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
2251         }
2252       }
2253     }
2254     ierr = PetscFree(values);CHKERRQ(ierr);
2255     /* do not copy values to GPU since they are all zero and not yet needed there */
2256     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
2257     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2258     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2259     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
2260     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2261   }
2262   ierr = PetscFree(cols);CHKERRQ(ierr);
2263   PetscFunctionReturn(0);
2264 }
2265 
2266 /* ---------------------------------------------------------------------------------*/
2267 
2268 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da,Mat J)
2269 {
2270   PetscErrorCode         ierr;
2271   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
2272   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt, maxcnt = 0,l,p,*dnz,*onz;
2273   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
2274   DM_DA                  *dd = (DM_DA*)da->data;
2275   PetscInt               ifill_col,*dfill = dd->dfill,*ofill = dd->ofill;
2276   MPI_Comm               comm;
2277   PetscScalar            *values;
2278   DMBoundaryType         bx,by,bz;
2279   ISLocalToGlobalMapping ltog;
2280   DMDAStencilType        st;
2281   PetscBool              removedups = PETSC_FALSE;
2282 
2283   PetscFunctionBegin;
2284   /*
2285          nc - number of components per grid point
2286          col - number of colors needed in one direction for single component problem
2287 
2288   */
2289   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
2290   col  = 2*s + 1;
2291   if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
2292                  by 2*stencil_width + 1\n");
2293   if (by == DM_BOUNDARY_PERIODIC && (n % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
2294                  by 2*stencil_width + 1\n");
2295   if (bz == DM_BOUNDARY_PERIODIC && (p % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
2296                  by 2*stencil_width + 1\n");
2297 
2298   /*
2299        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
2300        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
2301   */
2302   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
2303   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
2304   if (P == 1 && 2*s >= p) removedups = PETSC_TRUE;
2305 
2306   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
2307   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
2308   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
2309 
2310   ierr = PetscMalloc1(col*col*col*nc,&cols);CHKERRQ(ierr);
2311   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
2312 
2313   /* determine the matrix preallocation information */
2314   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
2315 
2316   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
2317   for (i=xs; i<xs+nx; i++) {
2318     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2319     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2320     for (j=ys; j<ys+ny; j++) {
2321       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2322       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2323       for (k=zs; k<zs+nz; k++) {
2324         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2325         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2326 
2327         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2328 
2329         for (l=0; l<nc; l++) {
2330           cnt = 0;
2331           for (ii=istart; ii<iend+1; ii++) {
2332             for (jj=jstart; jj<jend+1; jj++) {
2333               for (kk=kstart; kk<kend+1; kk++) {
2334                 if (ii || jj || kk) {
2335                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
2336                     for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2337                   }
2338                 } else {
2339                   if (dfill) {
2340                     for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2341                   } else {
2342                     for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2343                   }
2344                 }
2345               }
2346             }
2347           }
2348           row  = l + nc*(slot);
2349           maxcnt = PetscMax(maxcnt,cnt);
2350           if (removedups) {
2351             ierr = MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
2352           } else {
2353             ierr = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
2354           }
2355         }
2356       }
2357     }
2358   }
2359   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
2360   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
2361   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2362   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
2363 
2364   /*
2365     For each node in the grid: we get the neighbors in the local (on processor ordering
2366     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2367     PETSc ordering.
2368   */
2369   if (!da->prealloc_only) {
2370     ierr = PetscCalloc1(maxcnt,&values);CHKERRQ(ierr);
2371     for (i=xs; i<xs+nx; i++) {
2372       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2373       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2374       for (j=ys; j<ys+ny; j++) {
2375         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2376         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2377         for (k=zs; k<zs+nz; k++) {
2378           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2379           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2380 
2381           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2382 
2383           for (l=0; l<nc; l++) {
2384             cnt = 0;
2385             for (ii=istart; ii<iend+1; ii++) {
2386               for (jj=jstart; jj<jend+1; jj++) {
2387                 for (kk=kstart; kk<kend+1; kk++) {
2388                   if (ii || jj || kk) {
2389                     if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
2390                       for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2391                     }
2392                   } else {
2393                     if (dfill) {
2394                       for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2395                     } else {
2396                       for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2397                     }
2398                   }
2399                 }
2400               }
2401             }
2402             row  = l + nc*(slot);
2403             ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
2404           }
2405         }
2406       }
2407     }
2408     ierr = PetscFree(values);CHKERRQ(ierr);
2409     /* do not copy values to GPU since they are all zero and not yet needed there */
2410     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
2411     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2412     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2413     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
2414     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2415   }
2416   ierr = PetscFree(cols);CHKERRQ(ierr);
2417   PetscFunctionReturn(0);
2418 }
2419