xref: /petsc/src/dm/impls/da/fdda.c (revision 2fa40bb9206b96114faa7cb222621ec184d31cd2)
1 
2 #include <petsc/private/dmdaimpl.h> /*I      "petscdmda.h"     I*/
3 #include <petscmat.h>
4 
5 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM,ISColoringType,ISColoring*);
6 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM,ISColoringType,ISColoring*);
7 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM,ISColoringType,ISColoring*);
8 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM,ISColoringType,ISColoring*);
9 
10 /*
11    For ghost i that may be negative or greater than the upper bound this
12   maps it into the 0:m-1 range using periodicity
13 */
14 #define SetInRange(i,m) ((i < 0) ? m+i : ((i >= m) ? i-m : i))
15 
16 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill,PetscInt w,PetscInt **rfill)
17 {
18   PetscErrorCode ierr;
19   PetscInt       i,j,nz,*fill;
20 
21   PetscFunctionBegin;
22   if (!dfill) PetscFunctionReturn(0);
23 
24   /* count number nonzeros */
25   nz = 0;
26   for (i=0; i<w; i++) {
27     for (j=0; j<w; j++) {
28       if (dfill[w*i+j]) nz++;
29     }
30   }
31   ierr = PetscMalloc1(nz + w + 1,&fill);CHKERRQ(ierr);
32   /* construct modified CSR storage of nonzero structure */
33   /*  fill[0 -- w] marks starts of each row of column indices (and end of last row)
34    so fill[1] - fill[0] gives number of nonzeros in first row etc */
35   nz = w + 1;
36   for (i=0; i<w; i++) {
37     fill[i] = nz;
38     for (j=0; j<w; j++) {
39       if (dfill[w*i+j]) {
40         fill[nz] = j;
41         nz++;
42       }
43     }
44   }
45   fill[w] = nz;
46 
47   *rfill = fill;
48   PetscFunctionReturn(0);
49 }
50 
51 static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse,PetscInt w,PetscInt **rfill)
52 {
53   PetscErrorCode ierr;
54   PetscInt       nz;
55 
56   PetscFunctionBegin;
57   if (!dfillsparse) PetscFunctionReturn(0);
58 
59   /* Determine number of non-zeros */
60   nz = (dfillsparse[w] - w - 1);
61 
62   /* Allocate space for our copy of the given sparse matrix representation. */
63   ierr = PetscMalloc1(nz + w + 1,rfill);CHKERRQ(ierr);
64   ierr = PetscArraycpy(*rfill,dfillsparse,nz+w+1);CHKERRQ(ierr);
65   PetscFunctionReturn(0);
66 }
67 
68 static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd)
69 {
70   PetscErrorCode ierr;
71   PetscInt       i,k,cnt = 1;
72 
73   PetscFunctionBegin;
74 
75   /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of
76    columns to the left with any nonzeros in them plus 1 */
77   ierr = PetscCalloc1(dd->w,&dd->ofillcols);CHKERRQ(ierr);
78   for (i=0; i<dd->w; i++) {
79     for (k=dd->ofill[i]; k<dd->ofill[i+1]; k++) dd->ofillcols[dd->ofill[k]] = 1;
80   }
81   for (i=0; i<dd->w; i++) {
82     if (dd->ofillcols[i]) {
83       dd->ofillcols[i] = cnt++;
84     }
85   }
86   PetscFunctionReturn(0);
87 }
88 
89 /*@
90     DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem
91     of the matrix returned by DMCreateMatrix().
92 
93     Logically Collective on da
94 
95     Input Parameters:
96 +   da - the distributed array
97 .   dfill - the fill pattern in the diagonal block (may be NULL, means use dense block)
98 -   ofill - the fill pattern in the off-diagonal blocks
99 
100     Level: developer
101 
102     Notes:
103     This only makes sense when you are doing multicomponent problems but using the
104        MPIAIJ matrix format
105 
106            The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries
107        representing coupling and 0 entries for missing coupling. For example
108 $             dfill[9] = {1, 0, 0,
109 $                         1, 1, 0,
110 $                         0, 1, 1}
111        means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with
112        itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the
113        diagonal block).
114 
115      DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
116      can be represented in the dfill, ofill format
117 
118    Contributed by Glenn Hammond
119 
120 .seealso DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly()
121 
122 @*/
123 PetscErrorCode  DMDASetBlockFills(DM da,const PetscInt *dfill,const PetscInt *ofill)
124 {
125   DM_DA          *dd = (DM_DA*)da->data;
126   PetscErrorCode ierr;
127 
128   PetscFunctionBegin;
129   /* save the given dfill and ofill information */
130   ierr = DMDASetBlockFills_Private(dfill,dd->w,&dd->dfill);CHKERRQ(ierr);
131   ierr = DMDASetBlockFills_Private(ofill,dd->w,&dd->ofill);CHKERRQ(ierr);
132 
133   /* count nonzeros in ofill columns */
134   ierr = DMDASetBlockFills_Private2(dd);CHKERRQ(ierr);
135 
136   PetscFunctionReturn(0);
137 }
138 
139 /*@
140     DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem
141     of the matrix returned by DMCreateMatrix(), using sparse representations
142     of fill patterns.
143 
144     Logically Collective on da
145 
146     Input Parameters:
147 +   da - the distributed array
148 .   dfill - the sparse fill pattern in the diagonal block (may be NULL, means use dense block)
149 -   ofill - the sparse fill pattern in the off-diagonal blocks
150 
151     Level: developer
152 
153     Notes: This only makes sense when you are doing multicomponent problems but using the
154        MPIAIJ matrix format
155 
156            The format for dfill and ofill is a sparse representation of a
157            dof-by-dof matrix with 1 entries representing coupling and 0 entries
158            for missing coupling.  The sparse representation is a 1 dimensional
159            array of length nz + dof + 1, where nz is the number of non-zeros in
160            the matrix.  The first dof entries in the array give the
161            starting array indices of each row's items in the rest of the array,
162            the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array)
163            and the remaining nz items give the column indices of each of
164            the 1s within the logical 2D matrix.  Each row's items within
165            the array are the column indices of the 1s within that row
166            of the 2D matrix.  PETSc developers may recognize that this is the
167            same format as that computed by the DMDASetBlockFills_Private()
168            function from a dense 2D matrix representation.
169 
170      DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
171      can be represented in the dfill, ofill format
172 
173    Contributed by Philip C. Roth
174 
175 .seealso DMDASetBlockFills(), DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly()
176 
177 @*/
178 PetscErrorCode  DMDASetBlockFillsSparse(DM da,const PetscInt *dfillsparse,const PetscInt *ofillsparse)
179 {
180   DM_DA          *dd = (DM_DA*)da->data;
181   PetscErrorCode ierr;
182 
183   PetscFunctionBegin;
184   /* save the given dfill and ofill information */
185   ierr = DMDASetBlockFillsSparse_Private(dfillsparse,dd->w,&dd->dfill);CHKERRQ(ierr);
186   ierr = DMDASetBlockFillsSparse_Private(ofillsparse,dd->w,&dd->ofill);CHKERRQ(ierr);
187 
188   /* count nonzeros in ofill columns */
189   ierr = DMDASetBlockFills_Private2(dd);CHKERRQ(ierr);
190 
191   PetscFunctionReturn(0);
192 }
193 
194 PetscErrorCode  DMCreateColoring_DA(DM da,ISColoringType ctype,ISColoring *coloring)
195 {
196   PetscErrorCode   ierr;
197   PetscInt         dim,m,n,p,nc;
198   DMBoundaryType   bx,by,bz;
199   MPI_Comm         comm;
200   PetscMPIInt      size;
201   PetscBool        isBAIJ;
202   DM_DA            *dd = (DM_DA*)da->data;
203 
204   PetscFunctionBegin;
205   /*
206                                   m
207           ------------------------------------------------------
208          |                                                     |
209          |                                                     |
210          |               ----------------------                |
211          |               |                    |                |
212       n  |           yn  |                    |                |
213          |               |                    |                |
214          |               .---------------------                |
215          |             (xs,ys)     xn                          |
216          |            .                                        |
217          |         (gxs,gys)                                   |
218          |                                                     |
219           -----------------------------------------------------
220   */
221 
222   /*
223          nc - number of components per grid point
224          col - number of colors needed in one direction for single component problem
225 
226   */
227   ierr = DMDAGetInfo(da,&dim,NULL,NULL,NULL,&m,&n,&p,&nc,NULL,&bx,&by,&bz,NULL);CHKERRQ(ierr);
228 
229   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
230   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
231   if (ctype == IS_COLORING_LOCAL) {
232     if (size == 1) {
233       ctype = IS_COLORING_GLOBAL;
234     } else if (dim > 1) {
235       if ((m==1 && bx == DM_BOUNDARY_PERIODIC) || (n==1 && by == DM_BOUNDARY_PERIODIC) || (p==1 && bz == DM_BOUNDARY_PERIODIC)) {
236         SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain  on the same process");
237       }
238     }
239   }
240 
241   /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ
242      matrices is for the blocks, not the individual matrix elements  */
243   ierr = PetscStrbeginswith(da->mattype,MATBAIJ,&isBAIJ);CHKERRQ(ierr);
244   if (!isBAIJ) {ierr = PetscStrbeginswith(da->mattype,MATMPIBAIJ,&isBAIJ);CHKERRQ(ierr);}
245   if (!isBAIJ) {ierr = PetscStrbeginswith(da->mattype,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);}
246   if (isBAIJ) {
247     dd->w  = 1;
248     dd->xs = dd->xs/nc;
249     dd->xe = dd->xe/nc;
250     dd->Xs = dd->Xs/nc;
251     dd->Xe = dd->Xe/nc;
252   }
253 
254   /*
255      We do not provide a getcoloring function in the DMDA operations because
256    the basic DMDA does not know about matrices. We think of DMDA as being
257    more low-level then matrices.
258   */
259   if (dim == 1) {
260     ierr = DMCreateColoring_DA_1d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
261   } else if (dim == 2) {
262     ierr = DMCreateColoring_DA_2d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
263   } else if (dim == 3) {
264     ierr = DMCreateColoring_DA_3d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
265   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not done for %D dimension, send us mail petsc-maint@mcs.anl.gov for code",dim);
266   if (isBAIJ) {
267     dd->w  = nc;
268     dd->xs = dd->xs*nc;
269     dd->xe = dd->xe*nc;
270     dd->Xs = dd->Xs*nc;
271     dd->Xe = dd->Xe*nc;
272   }
273   PetscFunctionReturn(0);
274 }
275 
276 /* ---------------------------------------------------------------------------------*/
277 
278 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
279 {
280   PetscErrorCode  ierr;
281   PetscInt        xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,M,N,dim,s,k,nc,col;
282   PetscInt        ncolors;
283   MPI_Comm        comm;
284   DMBoundaryType  bx,by;
285   DMDAStencilType st;
286   ISColoringValue *colors;
287   DM_DA           *dd = (DM_DA*)da->data;
288 
289   PetscFunctionBegin;
290   /*
291          nc - number of components per grid point
292          col - number of colors needed in one direction for single component problem
293 
294   */
295   ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,&M,&N,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr);
296   col  = 2*s + 1;
297   ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr);
298   ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr);
299   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
300 
301   /* special case as taught to us by Paul Hovland */
302   if (st == DMDA_STENCIL_STAR && s == 1) {
303     ierr = DMCreateColoring_DA_2d_5pt_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
304   } else {
305     if (ctype == IS_COLORING_GLOBAL) {
306       if (!dd->localcoloring) {
307         ierr = PetscMalloc1(nc*nx*ny,&colors);CHKERRQ(ierr);
308         ii   = 0;
309         for (j=ys; j<ys+ny; j++) {
310           for (i=xs; i<xs+nx; i++) {
311             for (k=0; k<nc; k++) {
312               colors[ii++] = k + nc*((i % col) + col*(j % col));
313             }
314           }
315         }
316         ncolors = nc + nc*(col-1 + col*(col-1));
317         ierr    = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
318       }
319       *coloring = dd->localcoloring;
320     } else if (ctype == IS_COLORING_LOCAL) {
321       if (!dd->ghostedcoloring) {
322         ierr = PetscMalloc1(nc*gnx*gny,&colors);CHKERRQ(ierr);
323         ii   = 0;
324         for (j=gys; j<gys+gny; j++) {
325           for (i=gxs; i<gxs+gnx; i++) {
326             for (k=0; k<nc; k++) {
327               /* the complicated stuff is to handle periodic boundaries */
328               colors[ii++] = k + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col));
329             }
330           }
331         }
332         ncolors = nc + nc*(col - 1 + col*(col-1));
333         ierr    = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
334         /* PetscIntView(ncolors,(PetscInt*)colors,0); */
335 
336         ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
337       }
338       *coloring = dd->ghostedcoloring;
339     } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
340   }
341   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
342   PetscFunctionReturn(0);
343 }
344 
345 /* ---------------------------------------------------------------------------------*/
346 
347 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
348 {
349   PetscErrorCode  ierr;
350   PetscInt        xs,ys,nx,ny,i,j,gxs,gys,gnx,gny,m,n,p,dim,s,k,nc,col,zs,gzs,ii,l,nz,gnz,M,N,P;
351   PetscInt        ncolors;
352   MPI_Comm        comm;
353   DMBoundaryType  bx,by,bz;
354   DMDAStencilType st;
355   ISColoringValue *colors;
356   DM_DA           *dd = (DM_DA*)da->data;
357 
358   PetscFunctionBegin;
359   /*
360          nc - number of components per grid point
361          col - number of colors needed in one direction for single component problem
362 
363   */
364   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
365   col  = 2*s + 1;
366   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
367   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
368   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
369 
370   /* create the coloring */
371   if (ctype == IS_COLORING_GLOBAL) {
372     if (!dd->localcoloring) {
373       ierr = PetscMalloc1(nc*nx*ny*nz,&colors);CHKERRQ(ierr);
374       ii   = 0;
375       for (k=zs; k<zs+nz; k++) {
376         for (j=ys; j<ys+ny; j++) {
377           for (i=xs; i<xs+nx; i++) {
378             for (l=0; l<nc; l++) {
379               colors[ii++] = l + nc*((i % col) + col*(j % col) + col*col*(k % col));
380             }
381           }
382         }
383       }
384       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
385       ierr    = ISColoringCreate(comm,ncolors,nc*nx*ny*nz,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
386     }
387     *coloring = dd->localcoloring;
388   } else if (ctype == IS_COLORING_LOCAL) {
389     if (!dd->ghostedcoloring) {
390       ierr = PetscMalloc1(nc*gnx*gny*gnz,&colors);CHKERRQ(ierr);
391       ii   = 0;
392       for (k=gzs; k<gzs+gnz; k++) {
393         for (j=gys; j<gys+gny; j++) {
394           for (i=gxs; i<gxs+gnx; i++) {
395             for (l=0; l<nc; l++) {
396               /* the complicated stuff is to handle periodic boundaries */
397               colors[ii++] = l + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col) + col*col*(SetInRange(k,p) % col));
398             }
399           }
400         }
401       }
402       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
403       ierr    = ISColoringCreate(comm,ncolors,nc*gnx*gny*gnz,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
404       ierr    = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
405     }
406     *coloring = dd->ghostedcoloring;
407   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
408   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
409   PetscFunctionReturn(0);
410 }
411 
412 /* ---------------------------------------------------------------------------------*/
413 
414 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
415 {
416   PetscErrorCode  ierr;
417   PetscInt        xs,nx,i,i1,gxs,gnx,l,m,M,dim,s,nc,col;
418   PetscInt        ncolors;
419   MPI_Comm        comm;
420   DMBoundaryType  bx;
421   ISColoringValue *colors;
422   DM_DA           *dd = (DM_DA*)da->data;
423 
424   PetscFunctionBegin;
425   /*
426          nc - number of components per grid point
427          col - number of colors needed in one direction for single component problem
428 
429   */
430   ierr = DMDAGetInfo(da,&dim,&m,NULL,NULL,&M,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);CHKERRQ(ierr);
431   col  = 2*s + 1;
432   ierr = DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);CHKERRQ(ierr);
433   ierr = DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);CHKERRQ(ierr);
434   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
435 
436   /* create the coloring */
437   if (ctype == IS_COLORING_GLOBAL) {
438     if (!dd->localcoloring) {
439       ierr = PetscMalloc1(nc*nx,&colors);CHKERRQ(ierr);
440       if (dd->ofillcols) {
441         PetscInt tc = 0;
442         for (i=0; i<nc; i++) tc += (PetscInt) (dd->ofillcols[i] > 0);
443         i1 = 0;
444         for (i=xs; i<xs+nx; i++) {
445           for (l=0; l<nc; l++) {
446             if (dd->ofillcols[l] && (i % col)) {
447               colors[i1++] =  nc - 1 + tc*((i % col) - 1) + dd->ofillcols[l];
448             } else {
449               colors[i1++] = l;
450             }
451           }
452         }
453         ncolors = nc + 2*s*tc;
454       } else {
455         i1 = 0;
456         for (i=xs; i<xs+nx; i++) {
457           for (l=0; l<nc; l++) {
458             colors[i1++] = l + nc*(i % col);
459           }
460         }
461         ncolors = nc + nc*(col-1);
462       }
463       ierr = ISColoringCreate(comm,ncolors,nc*nx,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
464     }
465     *coloring = dd->localcoloring;
466   } else if (ctype == IS_COLORING_LOCAL) {
467     if (!dd->ghostedcoloring) {
468       ierr = PetscMalloc1(nc*gnx,&colors);CHKERRQ(ierr);
469       i1   = 0;
470       for (i=gxs; i<gxs+gnx; i++) {
471         for (l=0; l<nc; l++) {
472           /* the complicated stuff is to handle periodic boundaries */
473           colors[i1++] = l + nc*(SetInRange(i,m) % col);
474         }
475       }
476       ncolors = nc + nc*(col-1);
477       ierr    = ISColoringCreate(comm,ncolors,nc*gnx,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
478       ierr    = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
479     }
480     *coloring = dd->ghostedcoloring;
481   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
482   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
483   PetscFunctionReturn(0);
484 }
485 
486 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
487 {
488   PetscErrorCode  ierr;
489   PetscInt        xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,dim,s,k,nc;
490   PetscInt        ncolors;
491   MPI_Comm        comm;
492   DMBoundaryType  bx,by;
493   ISColoringValue *colors;
494   DM_DA           *dd = (DM_DA*)da->data;
495 
496   PetscFunctionBegin;
497   /*
498          nc - number of components per grid point
499          col - number of colors needed in one direction for single component problem
500 
501   */
502   ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,NULL);CHKERRQ(ierr);
503   ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr);
504   ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr);
505   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
506   /* create the coloring */
507   if (ctype == IS_COLORING_GLOBAL) {
508     if (!dd->localcoloring) {
509       ierr = PetscMalloc1(nc*nx*ny,&colors);CHKERRQ(ierr);
510       ii   = 0;
511       for (j=ys; j<ys+ny; j++) {
512         for (i=xs; i<xs+nx; i++) {
513           for (k=0; k<nc; k++) {
514             colors[ii++] = k + nc*((3*j+i) % 5);
515           }
516         }
517       }
518       ncolors = 5*nc;
519       ierr    = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
520     }
521     *coloring = dd->localcoloring;
522   } else if (ctype == IS_COLORING_LOCAL) {
523     if (!dd->ghostedcoloring) {
524       ierr = PetscMalloc1(nc*gnx*gny,&colors);CHKERRQ(ierr);
525       ii = 0;
526       for (j=gys; j<gys+gny; j++) {
527         for (i=gxs; i<gxs+gnx; i++) {
528           for (k=0; k<nc; k++) {
529             colors[ii++] = k + nc*((3*SetInRange(j,n) + SetInRange(i,m)) % 5);
530           }
531         }
532       }
533       ncolors = 5*nc;
534       ierr    = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
535       ierr    = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
536     }
537     *coloring = dd->ghostedcoloring;
538   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
539   PetscFunctionReturn(0);
540 }
541 
542 /* =========================================================================== */
543 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM,Mat,PetscBool);
544 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM,Mat);
545 extern PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM,Mat,PetscBool);
546 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM,Mat,PetscBool);
547 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM,Mat);
548 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM,Mat,PetscBool);
549 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM,Mat);
550 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM,Mat);
551 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM,Mat);
552 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM,Mat);
553 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM,Mat);
554 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM,Mat);
555 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM,Mat);
556 extern PetscErrorCode DMCreateMatrix_DA_IS(DM,Mat);
557 
558 /*@C
559    MatSetupDM - Sets the DMDA that is to be used by the HYPRE_StructMatrix PETSc matrix
560 
561    Logically Collective on mat
562 
563    Input Parameters:
564 +  mat - the matrix
565 -  da - the da
566 
567    Level: intermediate
568 
569 @*/
570 PetscErrorCode MatSetupDM(Mat mat,DM da)
571 {
572   PetscErrorCode ierr;
573 
574   PetscFunctionBegin;
575   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
576   PetscValidHeaderSpecificType(da,DM_CLASSID,2,DMDA);
577   ierr = PetscTryMethod(mat,"MatSetupDM_C",(Mat,DM),(mat,da));CHKERRQ(ierr);
578   PetscFunctionReturn(0);
579 }
580 
581 PetscErrorCode  MatView_MPI_DA(Mat A,PetscViewer viewer)
582 {
583   DM                da;
584   PetscErrorCode    ierr;
585   const char        *prefix;
586   Mat               Anatural;
587   AO                ao;
588   PetscInt          rstart,rend,*petsc,i;
589   IS                is;
590   MPI_Comm          comm;
591   PetscViewerFormat format;
592 
593   PetscFunctionBegin;
594   /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */
595   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
596   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(0);
597 
598   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
599   ierr = MatGetDM(A, &da);CHKERRQ(ierr);
600   if (!da) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
601 
602   ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);
603   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
604   ierr = PetscMalloc1(rend-rstart,&petsc);CHKERRQ(ierr);
605   for (i=rstart; i<rend; i++) petsc[i-rstart] = i;
606   ierr = AOApplicationToPetsc(ao,rend-rstart,petsc);CHKERRQ(ierr);
607   ierr = ISCreateGeneral(comm,rend-rstart,petsc,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
608 
609   /* call viewer on natural ordering */
610   ierr = MatCreateSubMatrix(A,is,is,MAT_INITIAL_MATRIX,&Anatural);CHKERRQ(ierr);
611   ierr = ISDestroy(&is);CHKERRQ(ierr);
612   ierr = PetscObjectGetOptionsPrefix((PetscObject)A,&prefix);CHKERRQ(ierr);
613   ierr = PetscObjectSetOptionsPrefix((PetscObject)Anatural,prefix);CHKERRQ(ierr);
614   ierr = PetscObjectSetName((PetscObject)Anatural,((PetscObject)A)->name);CHKERRQ(ierr);
615   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE;
616   ierr = MatView(Anatural,viewer);CHKERRQ(ierr);
617   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE;
618   ierr = MatDestroy(&Anatural);CHKERRQ(ierr);
619   PetscFunctionReturn(0);
620 }
621 
622 PetscErrorCode  MatLoad_MPI_DA(Mat A,PetscViewer viewer)
623 {
624   DM             da;
625   PetscErrorCode ierr;
626   Mat            Anatural,Aapp;
627   AO             ao;
628   PetscInt       rstart,rend,*app,i,m,n,M,N;
629   IS             is;
630   MPI_Comm       comm;
631 
632   PetscFunctionBegin;
633   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
634   ierr = MatGetDM(A, &da);CHKERRQ(ierr);
635   if (!da) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
636 
637   /* Load the matrix in natural ordering */
638   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Anatural);CHKERRQ(ierr);
639   ierr = MatSetType(Anatural,((PetscObject)A)->type_name);CHKERRQ(ierr);
640   ierr = MatGetSize(A,&M,&N);CHKERRQ(ierr);
641   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
642   ierr = MatSetSizes(Anatural,m,n,M,N);CHKERRQ(ierr);
643   ierr = MatLoad(Anatural,viewer);CHKERRQ(ierr);
644 
645   /* Map natural ordering to application ordering and create IS */
646   ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);
647   ierr = MatGetOwnershipRange(Anatural,&rstart,&rend);CHKERRQ(ierr);
648   ierr = PetscMalloc1(rend-rstart,&app);CHKERRQ(ierr);
649   for (i=rstart; i<rend; i++) app[i-rstart] = i;
650   ierr = AOPetscToApplication(ao,rend-rstart,app);CHKERRQ(ierr);
651   ierr = ISCreateGeneral(comm,rend-rstart,app,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
652 
653   /* Do permutation and replace header */
654   ierr = MatCreateSubMatrix(Anatural,is,is,MAT_INITIAL_MATRIX,&Aapp);CHKERRQ(ierr);
655   ierr = MatHeaderReplace(A,&Aapp);CHKERRQ(ierr);
656   ierr = ISDestroy(&is);CHKERRQ(ierr);
657   ierr = MatDestroy(&Anatural);CHKERRQ(ierr);
658   PetscFunctionReturn(0);
659 }
660 
661 PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J)
662 {
663   PetscErrorCode ierr;
664   PetscInt       dim,dof,nx,ny,nz,dims[3],starts[3],M,N,P;
665   Mat            A;
666   MPI_Comm       comm;
667   MatType        Atype;
668   void           (*aij)(void)=NULL,(*baij)(void)=NULL,(*sbaij)(void)=NULL,(*sell)(void)=NULL,(*is)(void)=NULL;
669   MatType        mtype;
670   PetscMPIInt    size;
671   DM_DA          *dd = (DM_DA*)da->data;
672 
673   PetscFunctionBegin;
674   ierr = MatInitializePackage();CHKERRQ(ierr);
675   mtype = da->mattype;
676 
677   /*
678                                   m
679           ------------------------------------------------------
680          |                                                     |
681          |                                                     |
682          |               ----------------------                |
683          |               |                    |                |
684       n  |           ny  |                    |                |
685          |               |                    |                |
686          |               .---------------------                |
687          |             (xs,ys)     nx                          |
688          |            .                                        |
689          |         (gxs,gys)                                   |
690          |                                                     |
691           -----------------------------------------------------
692   */
693 
694   /*
695          nc - number of components per grid point
696          col - number of colors needed in one direction for single component problem
697 
698   */
699   M   = dd->M;
700   N   = dd->N;
701   P   = dd->P;
702   dim = da->dim;
703   dof = dd->w;
704   /* ierr = DMDAGetInfo(da,&dim,&M,&N,&P,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL);CHKERRQ(ierr); */
705   ierr = DMDAGetCorners(da,NULL,NULL,NULL,&nx,&ny,&nz);CHKERRQ(ierr);
706   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
707   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
708   ierr = MatSetSizes(A,dof*nx*ny*nz,dof*nx*ny*nz,dof*M*N*P,dof*M*N*P);CHKERRQ(ierr);
709   ierr = MatSetType(A,mtype);CHKERRQ(ierr);
710   ierr = MatSetFromOptions(A);CHKERRQ(ierr);
711   ierr = MatSetDM(A,da);CHKERRQ(ierr);
712   if (da->structure_only) {
713     ierr = MatSetOption(A,MAT_STRUCTURE_ONLY,PETSC_TRUE);CHKERRQ(ierr);
714   }
715   ierr = MatGetType(A,&Atype);CHKERRQ(ierr);
716   /*
717      We do not provide a getmatrix function in the DMDA operations because
718    the basic DMDA does not know about matrices. We think of DMDA as being more
719    more low-level than matrices. This is kind of cheating but, cause sometimes
720    we think of DMDA has higher level than matrices.
721 
722      We could switch based on Atype (or mtype), but we do not since the
723    specialized setting routines depend only on the particular preallocation
724    details of the matrix, not the type itself.
725   */
726   ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJSetPreallocation_C",&aij);CHKERRQ(ierr);
727   if (!aij) {
728     ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJSetPreallocation_C",&aij);CHKERRQ(ierr);
729   }
730   if (!aij) {
731     ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIBAIJSetPreallocation_C",&baij);CHKERRQ(ierr);
732     if (!baij) {
733       ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",&baij);CHKERRQ(ierr);
734     }
735     if (!baij) {
736       ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPISBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr);
737       if (!sbaij) {
738         ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr);
739       }
740       if (!sbaij) {
741         ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPISELLSetPreallocation_C",&sell);CHKERRQ(ierr);
742         if (!sell) {
743           ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqSELLSetPreallocation_C",&sell);CHKERRQ(ierr);
744         }
745       }
746       if (!sell) {
747         ierr = PetscObjectQueryFunction((PetscObject)A,"MatISSetPreallocation_C",&is);CHKERRQ(ierr);
748       }
749     }
750   }
751   if (aij) {
752     if (dim == 1) {
753       if (dd->ofill) {
754         ierr = DMCreateMatrix_DA_1d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
755       } else {
756         DMBoundaryType bx;
757         PetscMPIInt  size;
758         ierr = DMDAGetInfo(da,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&bx,NULL,NULL,NULL);CHKERRQ(ierr);
759         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)da),&size);CHKERRMPI(ierr);
760         if (size == 1 && bx == DM_BOUNDARY_NONE) {
761           ierr = DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(da,A,PETSC_FALSE);CHKERRQ(ierr);
762         } else {
763           ierr = DMCreateMatrix_DA_1d_MPIAIJ(da,A,PETSC_FALSE);CHKERRQ(ierr);
764         }
765       }
766     } else if (dim == 2) {
767       if (dd->ofill) {
768         ierr = DMCreateMatrix_DA_2d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
769       } else {
770         ierr = DMCreateMatrix_DA_2d_MPIAIJ(da,A,PETSC_FALSE);CHKERRQ(ierr);
771       }
772     } else if (dim == 3) {
773       if (dd->ofill) {
774         ierr = DMCreateMatrix_DA_3d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
775       } else {
776         ierr = DMCreateMatrix_DA_3d_MPIAIJ(da,A,PETSC_FALSE);CHKERRQ(ierr);
777       }
778     }
779   } else if (baij) {
780     if (dim == 2) {
781       ierr = DMCreateMatrix_DA_2d_MPIBAIJ(da,A);CHKERRQ(ierr);
782     } else if (dim == 3) {
783       ierr = DMCreateMatrix_DA_3d_MPIBAIJ(da,A);CHKERRQ(ierr);
784     } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
785   } else if (sbaij) {
786     if (dim == 2) {
787       ierr = DMCreateMatrix_DA_2d_MPISBAIJ(da,A);CHKERRQ(ierr);
788     } else if (dim == 3) {
789       ierr = DMCreateMatrix_DA_3d_MPISBAIJ(da,A);CHKERRQ(ierr);
790     } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
791   } else if (sell) {
792      if (dim == 2) {
793        ierr = DMCreateMatrix_DA_2d_MPISELL(da,A);CHKERRQ(ierr);
794      } else if (dim == 3) {
795        ierr = DMCreateMatrix_DA_3d_MPISELL(da,A);CHKERRQ(ierr);
796      } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
797   } else if (is) {
798     ierr = DMCreateMatrix_DA_IS(da,A);CHKERRQ(ierr);
799   } else {
800     ISLocalToGlobalMapping ltog;
801 
802     ierr = MatSetBlockSize(A,dof);CHKERRQ(ierr);
803     ierr = MatSetUp(A);CHKERRQ(ierr);
804     ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
805     ierr = MatSetLocalToGlobalMapping(A,ltog,ltog);CHKERRQ(ierr);
806   }
807   ierr = DMDAGetGhostCorners(da,&starts[0],&starts[1],&starts[2],&dims[0],&dims[1],&dims[2]);CHKERRQ(ierr);
808   ierr = MatSetStencil(A,dim,dims,starts,dof);CHKERRQ(ierr);
809   ierr = MatSetDM(A,da);CHKERRQ(ierr);
810   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
811   if (size > 1) {
812     /* change viewer to display matrix in natural ordering */
813     ierr = MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA);CHKERRQ(ierr);
814     ierr = MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA);CHKERRQ(ierr);
815   }
816   *J = A;
817   PetscFunctionReturn(0);
818 }
819 
820 /* ---------------------------------------------------------------------------------*/
821 PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
822 
823 PetscErrorCode DMCreateMatrix_DA_IS(DM dm,Mat J)
824 {
825   DM_DA                  *da = (DM_DA*)dm->data;
826   Mat                    lJ;
827   ISLocalToGlobalMapping ltog;
828   IS                     is_loc_filt, is_glob;
829   const PetscInt         *e_loc,*idx;
830   PetscInt               nel,nen,nv,dof,dim,*gidx,nb;
831   PetscBool              flg;
832   PetscErrorCode         ierr;
833 
834   /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted)
835      We need to filter the local indices that are represented through the DMDAGetElements decomposition
836      This is because the size of the local matrices in MATIS is the local size of the l2g map */
837   PetscFunctionBegin;
838   dof  = da->w;
839   dim  = dm->dim;
840 
841   ierr = MatSetBlockSize(J,dof);CHKERRQ(ierr);
842 
843   /* get local elements indices in local DMDA numbering */
844   ierr = DMDAGetElements(dm,&nel,&nen,&e_loc);CHKERRQ(ierr); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */
845   ierr = ISCreateBlock(PetscObjectComm((PetscObject)dm),dof,nel*nen,e_loc,PETSC_COPY_VALUES,&is_loc_filt);CHKERRQ(ierr);
846   ierr = DMDARestoreElements(dm,&nel,&nen,&e_loc);CHKERRQ(ierr);
847 
848   /* obtain a consistent local ordering for MATIS */
849   ierr = ISSortRemoveDups(is_loc_filt);CHKERRQ(ierr);
850   ierr = ISBlockGetLocalSize(is_loc_filt,&nb);CHKERRQ(ierr);
851   ierr = DMGetLocalToGlobalMapping(dm,&ltog);CHKERRQ(ierr);
852   ierr = ISLocalToGlobalMappingGetSize(ltog,&nv);CHKERRQ(ierr);
853   ierr = PetscMalloc1(PetscMax(nb,nv/dof),&gidx);CHKERRQ(ierr);
854   ierr = ISBlockGetIndices(is_loc_filt,&idx);CHKERRQ(ierr);
855   ierr = ISLocalToGlobalMappingApplyBlock(ltog,nb,idx,gidx);CHKERRQ(ierr);
856   ierr = ISBlockRestoreIndices(is_loc_filt,&idx);CHKERRQ(ierr);
857   ierr = ISCreateBlock(PetscObjectComm((PetscObject)dm),dof,nb,gidx,PETSC_USE_POINTER,&is_glob);CHKERRQ(ierr);
858   ierr = ISLocalToGlobalMappingCreateIS(is_glob,&ltog);CHKERRQ(ierr);
859   ierr = ISDestroy(&is_glob);CHKERRQ(ierr);
860   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
861   ierr = ISLocalToGlobalMappingDestroy(&ltog);CHKERRQ(ierr);
862 
863   /* We also attach a l2g map to the local matrices to have MatSetValueLocal to work */
864   ierr = MatISGetLocalMat(J,&lJ);CHKERRQ(ierr);
865   ierr = ISLocalToGlobalMappingCreateIS(is_loc_filt,&ltog);CHKERRQ(ierr);
866   ierr = ISDestroy(&is_loc_filt);CHKERRQ(ierr);
867   ierr = ISCreateStride(PetscObjectComm((PetscObject)lJ),nv/dof,0,1,&is_glob);CHKERRQ(ierr);
868   ierr = ISGetIndices(is_glob,&idx);CHKERRQ(ierr);
869   ierr = ISGlobalToLocalMappingApplyBlock(ltog,IS_GTOLM_MASK,nv/dof,idx,&nb,gidx);CHKERRQ(ierr);
870   ierr = ISRestoreIndices(is_glob,&idx);CHKERRQ(ierr);
871   ierr = ISDestroy(&is_glob);CHKERRQ(ierr);
872   ierr = ISLocalToGlobalMappingDestroy(&ltog);CHKERRQ(ierr);
873   ierr = ISCreateBlock(PETSC_COMM_SELF,dof,nb,gidx,PETSC_USE_POINTER,&is_loc_filt);CHKERRQ(ierr);
874   ierr = ISLocalToGlobalMappingCreateIS(is_loc_filt,&ltog);CHKERRQ(ierr);
875   ierr = ISDestroy(&is_loc_filt);CHKERRQ(ierr);
876   ierr = MatSetLocalToGlobalMapping(lJ,ltog,ltog);CHKERRQ(ierr);
877   ierr = ISLocalToGlobalMappingDestroy(&ltog);CHKERRQ(ierr);
878   ierr = PetscFree(gidx);CHKERRQ(ierr);
879 
880   /* Preallocation (not exact): we reuse the preallocation routines of the assembled version  */
881   flg = dm->prealloc_only;
882   dm->prealloc_only = PETSC_TRUE;
883   switch (dim) {
884   case 1:
885     ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",MatISSetPreallocation_IS);CHKERRQ(ierr);
886     ierr = DMCreateMatrix_DA_1d_MPIAIJ(dm,J,PETSC_TRUE);CHKERRQ(ierr);
887     ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
888     break;
889   case 2:
890     ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",MatISSetPreallocation_IS);CHKERRQ(ierr);
891     ierr = DMCreateMatrix_DA_2d_MPIAIJ(dm,J,PETSC_TRUE);CHKERRQ(ierr);
892     ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
893     break;
894   case 3:
895     ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",MatISSetPreallocation_IS);CHKERRQ(ierr);
896     ierr = DMCreateMatrix_DA_3d_MPIAIJ(dm,J,PETSC_TRUE);CHKERRQ(ierr);
897     ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
898     break;
899   default:
900     SETERRQ1(PetscObjectComm((PetscObject)dm),PETSC_ERR_SUP,"Unhandled dimension %d",dim);
901   }
902   dm->prealloc_only = flg;
903   PetscFunctionReturn(0);
904 }
905 
906 PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da,Mat J)
907 {
908   PetscErrorCode         ierr;
909   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p;
910   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
911   MPI_Comm               comm;
912   PetscScalar            *values;
913   DMBoundaryType         bx,by;
914   ISLocalToGlobalMapping ltog;
915   DMDAStencilType        st;
916 
917   PetscFunctionBegin;
918   /*
919          nc - number of components per grid point
920          col - number of colors needed in one direction for single component problem
921 
922   */
923   ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr);
924   col  = 2*s + 1;
925   ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr);
926   ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr);
927   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
928 
929   ierr = PetscMalloc2(nc,&rows,col*col*nc*nc,&cols);CHKERRQ(ierr);
930   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
931 
932   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
933   /* determine the matrix preallocation information */
934   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
935   for (i=xs; i<xs+nx; i++) {
936 
937     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
938     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
939 
940     for (j=ys; j<ys+ny; j++) {
941       slot = i - gxs + gnx*(j - gys);
942 
943       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
944       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
945 
946       cnt = 0;
947       for (k=0; k<nc; k++) {
948         for (l=lstart; l<lend+1; l++) {
949           for (p=pstart; p<pend+1; p++) {
950             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
951               cols[cnt++] = k + nc*(slot + gnx*l + p);
952             }
953           }
954         }
955         rows[k] = k + nc*(slot);
956       }
957       ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
958     }
959   }
960   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
961   ierr = MatSeqSELLSetPreallocation(J,0,dnz);CHKERRQ(ierr);
962   ierr = MatMPISELLSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
963   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
964 
965   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
966 
967   /*
968     For each node in the grid: we get the neighbors in the local (on processor ordering
969     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
970     PETSc ordering.
971   */
972   if (!da->prealloc_only) {
973     ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr);
974     for (i=xs; i<xs+nx; i++) {
975 
976       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
977       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
978 
979       for (j=ys; j<ys+ny; j++) {
980         slot = i - gxs + gnx*(j - gys);
981 
982         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
983         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
984 
985         cnt = 0;
986         for (k=0; k<nc; k++) {
987           for (l=lstart; l<lend+1; l++) {
988             for (p=pstart; p<pend+1; p++) {
989               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
990                 cols[cnt++] = k + nc*(slot + gnx*l + p);
991               }
992             }
993           }
994           rows[k] = k + nc*(slot);
995         }
996         ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
997       }
998     }
999     ierr = PetscFree(values);CHKERRQ(ierr);
1000     /* do not copy values to GPU since they are all zero and not yet needed there */
1001     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1002     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1003     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1004     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1005     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1006   }
1007   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1008   PetscFunctionReturn(0);
1009 }
1010 
1011 PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da,Mat J)
1012 {
1013   PetscErrorCode         ierr;
1014   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1015   PetscInt               m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL;
1016   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
1017   MPI_Comm               comm;
1018   PetscScalar            *values;
1019   DMBoundaryType         bx,by,bz;
1020   ISLocalToGlobalMapping ltog;
1021   DMDAStencilType        st;
1022 
1023   PetscFunctionBegin;
1024   /*
1025          nc - number of components per grid point
1026          col - number of colors needed in one direction for single component problem
1027 
1028   */
1029   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1030   col  = 2*s + 1;
1031   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1032   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1033   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1034 
1035   ierr = PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols);CHKERRQ(ierr);
1036   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1037 
1038   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1039   /* determine the matrix preallocation information */
1040   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1041   for (i=xs; i<xs+nx; i++) {
1042     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1043     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1044     for (j=ys; j<ys+ny; j++) {
1045       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1046       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1047       for (k=zs; k<zs+nz; k++) {
1048         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1049         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1050 
1051         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1052 
1053         cnt = 0;
1054         for (l=0; l<nc; l++) {
1055           for (ii=istart; ii<iend+1; ii++) {
1056             for (jj=jstart; jj<jend+1; jj++) {
1057               for (kk=kstart; kk<kend+1; kk++) {
1058                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1059                   cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1060                 }
1061               }
1062             }
1063           }
1064           rows[l] = l + nc*(slot);
1065         }
1066         ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1067       }
1068     }
1069   }
1070   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1071   ierr = MatSeqSELLSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1072   ierr = MatMPISELLSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1073   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1074   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1075 
1076   /*
1077     For each node in the grid: we get the neighbors in the local (on processor ordering
1078     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1079     PETSc ordering.
1080   */
1081   if (!da->prealloc_only) {
1082     ierr = PetscCalloc1(col*col*col*nc*nc*nc,&values);CHKERRQ(ierr);
1083     for (i=xs; i<xs+nx; i++) {
1084       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1085       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1086       for (j=ys; j<ys+ny; j++) {
1087         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1088         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1089         for (k=zs; k<zs+nz; k++) {
1090           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1091           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1092 
1093           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1094 
1095           cnt = 0;
1096           for (l=0; l<nc; l++) {
1097             for (ii=istart; ii<iend+1; ii++) {
1098               for (jj=jstart; jj<jend+1; jj++) {
1099                 for (kk=kstart; kk<kend+1; kk++) {
1100                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1101                     cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1102                   }
1103                 }
1104               }
1105             }
1106             rows[l] = l + nc*(slot);
1107           }
1108           ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1109         }
1110       }
1111     }
1112     ierr = PetscFree(values);CHKERRQ(ierr);
1113     /* do not copy values to GPU since they are all zero and not yet needed there */
1114     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1115     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1116     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1117     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1118     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1119   }
1120   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1121   PetscFunctionReturn(0);
1122 }
1123 
1124 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da,Mat J,PetscBool isIS)
1125 {
1126   PetscErrorCode         ierr;
1127   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,M,N;
1128   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
1129   MPI_Comm               comm;
1130   DMBoundaryType         bx,by;
1131   ISLocalToGlobalMapping ltog,mltog;
1132   DMDAStencilType        st;
1133   PetscBool              removedups = PETSC_FALSE;
1134 
1135   PetscFunctionBegin;
1136   /*
1137          nc - number of components per grid point
1138          col - number of colors needed in one direction for single component problem
1139 
1140   */
1141   ierr = DMDAGetInfo(da,&dim,&m,&n,&M,&N,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr);
1142   if (!isIS && bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) {
1143     ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE);CHKERRQ(ierr);
1144   }
1145   col  = 2*s + 1;
1146   /*
1147        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1148        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1149   */
1150   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1151   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1152   ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr);
1153   ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr);
1154   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1155 
1156   ierr = PetscMalloc2(nc,&rows,col*col*nc*nc,&cols);CHKERRQ(ierr);
1157   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1158 
1159   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1160   /* determine the matrix preallocation information */
1161   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
1162   for (i=xs; i<xs+nx; i++) {
1163 
1164     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1165     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1166 
1167     for (j=ys; j<ys+ny; j++) {
1168       slot = i - gxs + gnx*(j - gys);
1169 
1170       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1171       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1172 
1173       cnt = 0;
1174       for (k=0; k<nc; k++) {
1175         for (l=lstart; l<lend+1; l++) {
1176           for (p=pstart; p<pend+1; p++) {
1177             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
1178               cols[cnt++] = k + nc*(slot + gnx*l + p);
1179             }
1180           }
1181         }
1182         rows[k] = k + nc*(slot);
1183       }
1184       if (removedups) {
1185         ierr = MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1186       } else {
1187         ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1188       }
1189     }
1190   }
1191   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1192   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1193   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1194   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1195   ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr);
1196   if (!mltog) {
1197     ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1198   }
1199 
1200   /*
1201     For each node in the grid: we get the neighbors in the local (on processor ordering
1202     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1203     PETSc ordering.
1204   */
1205   if (!da->prealloc_only) {
1206     for (i=xs; i<xs+nx; i++) {
1207 
1208       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1209       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1210 
1211       for (j=ys; j<ys+ny; j++) {
1212         slot = i - gxs + gnx*(j - gys);
1213 
1214         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1215         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1216 
1217         cnt = 0;
1218         for (l=lstart; l<lend+1; l++) {
1219           for (p=pstart; p<pend+1; p++) {
1220             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
1221               cols[cnt++] = nc*(slot + gnx*l + p);
1222               for (k=1; k<nc; k++) {
1223                 cols[cnt] = 1 + cols[cnt-1];cnt++;
1224               }
1225             }
1226           }
1227         }
1228         for (k=0; k<nc; k++) rows[k] = k + nc*(slot);
1229         ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr);
1230       }
1231     }
1232     /* do not copy values to GPU since they are all zero and not yet needed there */
1233     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1234     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1235     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1236     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1237     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1238     if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) {
1239       ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr);
1240     }
1241   }
1242   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1243   PetscFunctionReturn(0);
1244 }
1245 
1246 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da,Mat J)
1247 {
1248   PetscErrorCode         ierr;
1249   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1250   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt,maxcnt = 0,l,p,M,N;
1251   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
1252   DM_DA                  *dd = (DM_DA*)da->data;
1253   PetscInt               ifill_col,*ofill = dd->ofill, *dfill = dd->dfill;
1254   MPI_Comm               comm;
1255   DMBoundaryType         bx,by;
1256   ISLocalToGlobalMapping ltog;
1257   DMDAStencilType        st;
1258   PetscBool              removedups = PETSC_FALSE;
1259 
1260   PetscFunctionBegin;
1261   /*
1262          nc - number of components per grid point
1263          col - number of colors needed in one direction for single component problem
1264 
1265   */
1266   ierr = DMDAGetInfo(da,&dim,&m,&n,&M,&N,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr);
1267   col  = 2*s + 1;
1268   /*
1269        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1270        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1271   */
1272   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1273   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1274   ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr);
1275   ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr);
1276   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1277 
1278   ierr = PetscMalloc1(col*col*nc,&cols);CHKERRQ(ierr);
1279   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1280 
1281   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1282   /* determine the matrix preallocation information */
1283   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
1284   for (i=xs; i<xs+nx; i++) {
1285 
1286     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1287     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1288 
1289     for (j=ys; j<ys+ny; j++) {
1290       slot = i - gxs + gnx*(j - gys);
1291 
1292       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1293       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1294 
1295       for (k=0; k<nc; k++) {
1296         cnt = 0;
1297         for (l=lstart; l<lend+1; l++) {
1298           for (p=pstart; p<pend+1; p++) {
1299             if (l || p) {
1300               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
1301                 for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p);
1302               }
1303             } else {
1304               if (dfill) {
1305                 for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p);
1306               } else {
1307                 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p);
1308               }
1309             }
1310           }
1311         }
1312         row    = k + nc*(slot);
1313         maxcnt = PetscMax(maxcnt,cnt);
1314         if (removedups) {
1315           ierr   = MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1316         } else {
1317           ierr   = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1318         }
1319       }
1320     }
1321   }
1322   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1323   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1324   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1325   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1326 
1327   /*
1328     For each node in the grid: we get the neighbors in the local (on processor ordering
1329     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1330     PETSc ordering.
1331   */
1332   if (!da->prealloc_only) {
1333     for (i=xs; i<xs+nx; i++) {
1334 
1335       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1336       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1337 
1338       for (j=ys; j<ys+ny; j++) {
1339         slot = i - gxs + gnx*(j - gys);
1340 
1341         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1342         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1343 
1344         for (k=0; k<nc; k++) {
1345           cnt = 0;
1346           for (l=lstart; l<lend+1; l++) {
1347             for (p=pstart; p<pend+1; p++) {
1348               if (l || p) {
1349                 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
1350                   for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p);
1351                 }
1352               } else {
1353                 if (dfill) {
1354                   for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p);
1355                 } else {
1356                   for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p);
1357                 }
1358               }
1359             }
1360           }
1361           row  = k + nc*(slot);
1362           ierr = MatSetValuesLocal(J,1,&row,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr);
1363         }
1364       }
1365     }
1366     /* do not copy values to GPU since they are all zero and not yet needed there */
1367     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1368     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1369     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1370     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1371     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1372   }
1373   ierr = PetscFree(cols);CHKERRQ(ierr);
1374   PetscFunctionReturn(0);
1375 }
1376 
1377 /* ---------------------------------------------------------------------------------*/
1378 
1379 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da,Mat J,PetscBool isIS)
1380 {
1381   PetscErrorCode         ierr;
1382   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1383   PetscInt               m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL;
1384   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
1385   MPI_Comm               comm;
1386   DMBoundaryType         bx,by,bz;
1387   ISLocalToGlobalMapping ltog,mltog;
1388   DMDAStencilType        st;
1389   PetscBool              removedups = PETSC_FALSE;
1390 
1391   PetscFunctionBegin;
1392   /*
1393          nc - number of components per grid point
1394          col - number of colors needed in one direction for single component problem
1395 
1396   */
1397   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1398   if (!isIS && bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) {
1399     ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE);CHKERRQ(ierr);
1400   }
1401   col  = 2*s + 1;
1402 
1403   /*
1404        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1405        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1406   */
1407   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1408   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1409   if (P == 1 && 2*s >= p) removedups = PETSC_TRUE;
1410 
1411   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1412   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1413   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1414 
1415   ierr = PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols);CHKERRQ(ierr);
1416   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1417 
1418   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1419   /* determine the matrix preallocation information */
1420   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1421   for (i=xs; i<xs+nx; i++) {
1422     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1423     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1424     for (j=ys; j<ys+ny; j++) {
1425       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1426       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1427       for (k=zs; k<zs+nz; k++) {
1428         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1429         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1430 
1431         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1432 
1433         cnt = 0;
1434         for (l=0; l<nc; l++) {
1435           for (ii=istart; ii<iend+1; ii++) {
1436             for (jj=jstart; jj<jend+1; jj++) {
1437               for (kk=kstart; kk<kend+1; kk++) {
1438                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1439                   cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1440                 }
1441               }
1442             }
1443           }
1444           rows[l] = l + nc*(slot);
1445         }
1446         if (removedups) {
1447           ierr = MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1448         } else {
1449           ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1450         }
1451       }
1452     }
1453   }
1454   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1455   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1456   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1457   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1458   ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr);
1459   if (!mltog) {
1460     ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1461   }
1462 
1463   /*
1464     For each node in the grid: we get the neighbors in the local (on processor ordering
1465     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1466     PETSc ordering.
1467   */
1468   if (!da->prealloc_only) {
1469     for (i=xs; i<xs+nx; i++) {
1470       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1471       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1472       for (j=ys; j<ys+ny; j++) {
1473         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1474         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1475         for (k=zs; k<zs+nz; k++) {
1476           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1477           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1478 
1479           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1480 
1481           cnt = 0;
1482           for (kk=kstart; kk<kend+1; kk++) {
1483             for (jj=jstart; jj<jend+1; jj++) {
1484               for (ii=istart; ii<iend+1; ii++) {
1485                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1486                   cols[cnt++] = nc*(slot + ii + gnx*jj + gnx*gny*kk);
1487                     for (l=1; l<nc; l++) {
1488                       cols[cnt] = 1 + cols[cnt-1];cnt++;
1489                   }
1490                 }
1491               }
1492             }
1493           }
1494           rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1];
1495           ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr);
1496         }
1497       }
1498     }
1499     /* do not copy values to GPU since they are all zero and not yet needed there */
1500     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1501     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1502     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1503     if (!isIS && bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) {
1504       ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr);
1505     }
1506     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1507     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1508   }
1509   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1510   PetscFunctionReturn(0);
1511 }
1512 
1513 /* ---------------------------------------------------------------------------------*/
1514 
1515 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da,Mat J)
1516 {
1517   PetscErrorCode         ierr;
1518   DM_DA                  *dd = (DM_DA*)da->data;
1519   PetscInt               xs,nx,i,j,gxs,gnx,row,k,l;
1520   PetscInt               m,dim,s,*cols = NULL,nc,cnt,maxcnt = 0,*ocols;
1521   PetscInt               *ofill = dd->ofill,*dfill = dd->dfill;
1522   DMBoundaryType         bx;
1523   ISLocalToGlobalMapping ltog;
1524   PetscMPIInt            rank,size;
1525 
1526   PetscFunctionBegin;
1527   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)da),&rank);CHKERRMPI(ierr);
1528   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)da),&size);CHKERRMPI(ierr);
1529 
1530   /*
1531          nc - number of components per grid point
1532 
1533   */
1534   ierr = DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);CHKERRQ(ierr);
1535   if (s > 1) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Matrix creation for 1d not implemented correctly for stencil width larger than 1");
1536   ierr = DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);CHKERRQ(ierr);
1537   ierr = DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);CHKERRQ(ierr);
1538 
1539   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1540   ierr = PetscCalloc2(nx*nc,&cols,nx*nc,&ocols);CHKERRQ(ierr);
1541 
1542   /*
1543         note should be smaller for first and last process with no periodic
1544         does not handle dfill
1545   */
1546   cnt = 0;
1547   /* coupling with process to the left */
1548   for (i=0; i<s; i++) {
1549     for (j=0; j<nc; j++) {
1550       ocols[cnt] = ((rank == 0) ? 0 : (s - i)*(ofill[j+1] - ofill[j]));
1551       cols[cnt]  = dfill[j+1] - dfill[j] + (s + i)*(ofill[j+1] - ofill[j]);
1552       if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1553         if (size > 1) ocols[cnt] += (s - i)*(ofill[j+1] - ofill[j]);
1554         else cols[cnt] += (s - i)*(ofill[j+1] - ofill[j]);
1555       }
1556       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1557       cnt++;
1558     }
1559   }
1560   for (i=s; i<nx-s; i++) {
1561     for (j=0; j<nc; j++) {
1562       cols[cnt] = dfill[j+1] - dfill[j] + 2*s*(ofill[j+1] - ofill[j]);
1563       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1564       cnt++;
1565     }
1566   }
1567   /* coupling with process to the right */
1568   for (i=nx-s; i<nx; i++) {
1569     for (j=0; j<nc; j++) {
1570       ocols[cnt] = ((rank == (size-1)) ? 0 : (i - nx + s + 1)*(ofill[j+1] - ofill[j]));
1571       cols[cnt]  = dfill[j+1] - dfill[j] + (s + nx - i - 1)*(ofill[j+1] - ofill[j]);
1572       if ((rank == size-1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1573         if (size > 1) ocols[cnt] += (i - nx + s + 1)*(ofill[j+1] - ofill[j]);
1574         else cols[cnt] += (i - nx + s + 1)*(ofill[j+1] - ofill[j]);
1575       }
1576       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1577       cnt++;
1578     }
1579   }
1580 
1581   ierr = MatSeqAIJSetPreallocation(J,0,cols);CHKERRQ(ierr);
1582   ierr = MatMPIAIJSetPreallocation(J,0,cols,0,ocols);CHKERRQ(ierr);
1583   ierr = PetscFree2(cols,ocols);CHKERRQ(ierr);
1584 
1585   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1586   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1587 
1588   /*
1589     For each node in the grid: we get the neighbors in the local (on processor ordering
1590     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1591     PETSc ordering.
1592   */
1593   if (!da->prealloc_only) {
1594     ierr = PetscMalloc1(maxcnt,&cols);CHKERRQ(ierr);
1595     row = xs*nc;
1596     /* coupling with process to the left */
1597     for (i=xs; i<xs+s; i++) {
1598       for (j=0; j<nc; j++) {
1599         cnt = 0;
1600         if (rank) {
1601           for (l=0; l<s; l++) {
1602             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1603           }
1604         }
1605         if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1606           for (l=0; l<s; l++) {
1607             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (m + i - s - l)*nc + ofill[k];
1608           }
1609         }
1610         if (dfill) {
1611           for (k=dfill[j]; k<dfill[j+1]; k++) {
1612             cols[cnt++] = i*nc + dfill[k];
1613           }
1614         } else {
1615           for (k=0; k<nc; k++) {
1616             cols[cnt++] = i*nc + k;
1617           }
1618         }
1619         for (l=0; l<s; l++) {
1620           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1621         }
1622         ierr = MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr);
1623         row++;
1624       }
1625     }
1626     for (i=xs+s; i<xs+nx-s; i++) {
1627       for (j=0; j<nc; j++) {
1628         cnt = 0;
1629         for (l=0; l<s; l++) {
1630           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1631         }
1632         if (dfill) {
1633           for (k=dfill[j]; k<dfill[j+1]; k++) {
1634             cols[cnt++] = i*nc + dfill[k];
1635           }
1636         } else {
1637           for (k=0; k<nc; k++) {
1638             cols[cnt++] = i*nc + k;
1639           }
1640         }
1641         for (l=0; l<s; l++) {
1642           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1643         }
1644         ierr = MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr);
1645         row++;
1646       }
1647     }
1648     /* coupling with process to the right */
1649     for (i=xs+nx-s; i<xs+nx; i++) {
1650       for (j=0; j<nc; j++) {
1651         cnt = 0;
1652         for (l=0; l<s; l++) {
1653           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1654         }
1655         if (dfill) {
1656           for (k=dfill[j]; k<dfill[j+1]; k++) {
1657             cols[cnt++] = i*nc + dfill[k];
1658           }
1659         } else {
1660           for (k=0; k<nc; k++) {
1661             cols[cnt++] = i*nc + k;
1662           }
1663         }
1664         if (rank < size-1) {
1665           for (l=0; l<s; l++) {
1666             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1667           }
1668         }
1669         if ((rank == size-1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1670           for (l=0; l<s; l++) {
1671             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s - l - m + 2)*nc + ofill[k];
1672           }
1673         }
1674         ierr = MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr);
1675         row++;
1676       }
1677     }
1678     ierr = PetscFree(cols);CHKERRQ(ierr);
1679     /* do not copy values to GPU since they are all zero and not yet needed there */
1680     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1681     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1682     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1683     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1684     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1685   }
1686   PetscFunctionReturn(0);
1687 }
1688 
1689 /* ---------------------------------------------------------------------------------*/
1690 
1691 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da,Mat J,PetscBool isIS)
1692 {
1693   PetscErrorCode         ierr;
1694   PetscInt               xs,nx,i,i1,slot,gxs,gnx;
1695   PetscInt               m,dim,s,*cols = NULL,nc,*rows = NULL,col,cnt,l;
1696   PetscInt               istart,iend;
1697   DMBoundaryType         bx;
1698   ISLocalToGlobalMapping ltog,mltog;
1699 
1700   PetscFunctionBegin;
1701   /*
1702          nc - number of components per grid point
1703          col - number of colors needed in one direction for single component problem
1704 
1705   */
1706   ierr = DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);CHKERRQ(ierr);
1707   if (!isIS && bx == DM_BOUNDARY_NONE) {
1708     ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE);CHKERRQ(ierr);
1709   }
1710   col  = 2*s + 1;
1711 
1712   ierr = DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);CHKERRQ(ierr);
1713   ierr = DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);CHKERRQ(ierr);
1714 
1715   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1716   ierr = MatSeqAIJSetPreallocation(J,col*nc,NULL);CHKERRQ(ierr);
1717   ierr = MatMPIAIJSetPreallocation(J,col*nc,NULL,col*nc,NULL);CHKERRQ(ierr);
1718 
1719   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1720   ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr);
1721   if (!mltog) {
1722     ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1723   }
1724 
1725   /*
1726     For each node in the grid: we get the neighbors in the local (on processor ordering
1727     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1728     PETSc ordering.
1729   */
1730   if (!da->prealloc_only) {
1731     ierr = PetscMalloc2(nc,&rows,col*nc*nc,&cols);CHKERRQ(ierr);
1732     for (i=xs; i<xs+nx; i++) {
1733       istart = PetscMax(-s,gxs - i);
1734       iend   = PetscMin(s,gxs + gnx - i - 1);
1735       slot   = i - gxs;
1736 
1737       cnt = 0;
1738       for (i1=istart; i1<iend+1; i1++) {
1739         cols[cnt++] = nc*(slot + i1);
1740         for (l=1; l<nc; l++) {
1741           cols[cnt] = 1 + cols[cnt-1];cnt++;
1742         }
1743       }
1744       rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1];
1745       ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr);
1746     }
1747     /* do not copy values to GPU since they are all zero and not yet needed there */
1748     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1749     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1750     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1751     if (!isIS && bx == DM_BOUNDARY_NONE) {
1752       ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr);
1753     }
1754     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1755     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1756     ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1757   }
1758   PetscFunctionReturn(0);
1759 }
1760 
1761 /* ---------------------------------------------------------------------------------*/
1762 
1763 PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM da,Mat J,PetscBool isIS)
1764 {
1765   PetscErrorCode         ierr;
1766   PetscInt               xs,nx,i,i1,slot,gxs,gnx;
1767   PetscInt               m,dim,s,*cols = NULL,nc,*rows = NULL,col,cnt,l;
1768   PetscInt               istart,iend;
1769   DMBoundaryType         bx;
1770   ISLocalToGlobalMapping ltog,mltog;
1771 
1772   PetscFunctionBegin;
1773   /*
1774          nc - number of components per grid point
1775          col - number of colors needed in one direction for single component problem
1776   */
1777   ierr = DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);CHKERRQ(ierr);
1778   col  = 2*s + 1;
1779 
1780   ierr = DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);CHKERRQ(ierr);
1781   ierr = DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);CHKERRQ(ierr);
1782 
1783   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1784   ierr = MatSeqAIJSetTotalPreallocation(J,nx*nc*col*nc);CHKERRQ(ierr);
1785 
1786   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1787   ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr);
1788   if (!mltog) {
1789     ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1790   }
1791 
1792   /*
1793     For each node in the grid: we get the neighbors in the local (on processor ordering
1794     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1795     PETSc ordering.
1796   */
1797   if (!da->prealloc_only) {
1798     ierr = PetscMalloc2(nc,&rows,col*nc*nc,&cols);CHKERRQ(ierr);
1799     for (i=xs; i<xs+nx; i++) {
1800       istart = PetscMax(-s,gxs - i);
1801       iend   = PetscMin(s,gxs + gnx - i - 1);
1802       slot   = i - gxs;
1803 
1804       cnt = 0;
1805       for (i1=istart; i1<iend+1; i1++) {
1806         cols[cnt++] = nc*(slot + i1);
1807         for (l=1; l<nc; l++) {
1808           cols[cnt] = 1 + cols[cnt-1];cnt++;
1809         }
1810       }
1811       rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1];
1812       ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr);
1813     }
1814     /* do not copy values to GPU since they are all zero and not yet needed there */
1815     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1816     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1817     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1818     if (!isIS && bx == DM_BOUNDARY_NONE) {
1819       ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr);
1820     }
1821     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1822     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1823     ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1824   }
1825   ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr);
1826   PetscFunctionReturn(0);
1827 }
1828 
1829 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da,Mat J)
1830 {
1831   PetscErrorCode         ierr;
1832   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1833   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
1834   PetscInt               istart,iend,jstart,jend,ii,jj;
1835   MPI_Comm               comm;
1836   PetscScalar            *values;
1837   DMBoundaryType         bx,by;
1838   DMDAStencilType        st;
1839   ISLocalToGlobalMapping ltog;
1840 
1841   PetscFunctionBegin;
1842   /*
1843      nc - number of components per grid point
1844      col - number of colors needed in one direction for single component problem
1845   */
1846   ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr);
1847   col  = 2*s + 1;
1848 
1849   ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr);
1850   ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr);
1851   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1852 
1853   ierr = PetscMalloc1(col*col*nc*nc,&cols);CHKERRQ(ierr);
1854 
1855   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1856 
1857   /* determine the matrix preallocation information */
1858   ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
1859   for (i=xs; i<xs+nx; i++) {
1860     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1861     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1862     for (j=ys; j<ys+ny; j++) {
1863       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1864       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1865       slot   = i - gxs + gnx*(j - gys);
1866 
1867       /* Find block columns in block row */
1868       cnt = 0;
1869       for (ii=istart; ii<iend+1; ii++) {
1870         for (jj=jstart; jj<jend+1; jj++) {
1871           if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1872             cols[cnt++] = slot + ii + gnx*jj;
1873           }
1874         }
1875       }
1876       ierr = MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1877     }
1878   }
1879   ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1880   ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1881   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1882 
1883   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1884 
1885   /*
1886     For each node in the grid: we get the neighbors in the local (on processor ordering
1887     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1888     PETSc ordering.
1889   */
1890   if (!da->prealloc_only) {
1891     ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr);
1892     for (i=xs; i<xs+nx; i++) {
1893       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1894       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1895       for (j=ys; j<ys+ny; j++) {
1896         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1897         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1898         slot = i - gxs + gnx*(j - gys);
1899         cnt  = 0;
1900         for (ii=istart; ii<iend+1; ii++) {
1901           for (jj=jstart; jj<jend+1; jj++) {
1902             if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1903               cols[cnt++] = slot + ii + gnx*jj;
1904             }
1905           }
1906         }
1907         ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1908       }
1909     }
1910     ierr = PetscFree(values);CHKERRQ(ierr);
1911     /* do not copy values to GPU since they are all zero and not yet needed there */
1912     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1913     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1914     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1915     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1916     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1917   }
1918   ierr = PetscFree(cols);CHKERRQ(ierr);
1919   PetscFunctionReturn(0);
1920 }
1921 
1922 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da,Mat J)
1923 {
1924   PetscErrorCode         ierr;
1925   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1926   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
1927   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1928   MPI_Comm               comm;
1929   PetscScalar            *values;
1930   DMBoundaryType         bx,by,bz;
1931   DMDAStencilType        st;
1932   ISLocalToGlobalMapping ltog;
1933 
1934   PetscFunctionBegin;
1935   /*
1936          nc - number of components per grid point
1937          col - number of colors needed in one direction for single component problem
1938 
1939   */
1940   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,NULL,NULL,NULL,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1941   col  = 2*s + 1;
1942 
1943   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1944   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1945   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1946 
1947   ierr = PetscMalloc1(col*col*col,&cols);CHKERRQ(ierr);
1948 
1949   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1950 
1951   /* determine the matrix preallocation information */
1952   ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1953   for (i=xs; i<xs+nx; i++) {
1954     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1955     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1956     for (j=ys; j<ys+ny; j++) {
1957       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1958       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1959       for (k=zs; k<zs+nz; k++) {
1960         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1961         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1962 
1963         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1964 
1965         /* Find block columns in block row */
1966         cnt = 0;
1967         for (ii=istart; ii<iend+1; ii++) {
1968           for (jj=jstart; jj<jend+1; jj++) {
1969             for (kk=kstart; kk<kend+1; kk++) {
1970               if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1971                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1972               }
1973             }
1974           }
1975         }
1976         ierr = MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1977       }
1978     }
1979   }
1980   ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1981   ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1982   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1983 
1984   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1985 
1986   /*
1987     For each node in the grid: we get the neighbors in the local (on processor ordering
1988     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1989     PETSc ordering.
1990   */
1991   if (!da->prealloc_only) {
1992     ierr = PetscCalloc1(col*col*col*nc*nc,&values);CHKERRQ(ierr);
1993     for (i=xs; i<xs+nx; i++) {
1994       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1995       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1996       for (j=ys; j<ys+ny; j++) {
1997         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1998         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1999         for (k=zs; k<zs+nz; k++) {
2000           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2001           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2002 
2003           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2004 
2005           cnt = 0;
2006           for (ii=istart; ii<iend+1; ii++) {
2007             for (jj=jstart; jj<jend+1; jj++) {
2008               for (kk=kstart; kk<kend+1; kk++) {
2009                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
2010                   cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
2011                 }
2012               }
2013             }
2014           }
2015           ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
2016         }
2017       }
2018     }
2019     ierr = PetscFree(values);CHKERRQ(ierr);
2020     /* do not copy values to GPU since they are all zero and not yet needed there */
2021     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
2022     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2023     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2024     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
2025     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2026   }
2027   ierr = PetscFree(cols);CHKERRQ(ierr);
2028   PetscFunctionReturn(0);
2029 }
2030 
2031 /*
2032   This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to
2033   identify in the local ordering with periodic domain.
2034 */
2035 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog,PetscInt *row,PetscInt *cnt,PetscInt col[])
2036 {
2037   PetscErrorCode ierr;
2038   PetscInt       i,n;
2039 
2040   PetscFunctionBegin;
2041   ierr = ISLocalToGlobalMappingApplyBlock(ltog,1,row,row);CHKERRQ(ierr);
2042   ierr = ISLocalToGlobalMappingApplyBlock(ltog,*cnt,col,col);CHKERRQ(ierr);
2043   for (i=0,n=0; i<*cnt; i++) {
2044     if (col[i] >= *row) col[n++] = col[i];
2045   }
2046   *cnt = n;
2047   PetscFunctionReturn(0);
2048 }
2049 
2050 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da,Mat J)
2051 {
2052   PetscErrorCode         ierr;
2053   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
2054   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
2055   PetscInt               istart,iend,jstart,jend,ii,jj;
2056   MPI_Comm               comm;
2057   PetscScalar            *values;
2058   DMBoundaryType         bx,by;
2059   DMDAStencilType        st;
2060   ISLocalToGlobalMapping ltog;
2061 
2062   PetscFunctionBegin;
2063   /*
2064      nc - number of components per grid point
2065      col - number of colors needed in one direction for single component problem
2066   */
2067   ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr);
2068   col  = 2*s + 1;
2069 
2070   ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr);
2071   ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr);
2072   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
2073 
2074   ierr = PetscMalloc1(col*col*nc*nc,&cols);CHKERRQ(ierr);
2075 
2076   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
2077 
2078   /* determine the matrix preallocation information */
2079   ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
2080   for (i=xs; i<xs+nx; i++) {
2081     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2082     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2083     for (j=ys; j<ys+ny; j++) {
2084       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2085       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2086       slot   = i - gxs + gnx*(j - gys);
2087 
2088       /* Find block columns in block row */
2089       cnt = 0;
2090       for (ii=istart; ii<iend+1; ii++) {
2091         for (jj=jstart; jj<jend+1; jj++) {
2092           if (st == DMDA_STENCIL_BOX || !ii || !jj) {
2093             cols[cnt++] = slot + ii + gnx*jj;
2094           }
2095         }
2096       }
2097       ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
2098       ierr = MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
2099     }
2100   }
2101   ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
2102   ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
2103   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2104 
2105   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
2106 
2107   /*
2108     For each node in the grid: we get the neighbors in the local (on processor ordering
2109     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2110     PETSc ordering.
2111   */
2112   if (!da->prealloc_only) {
2113     ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr);
2114     for (i=xs; i<xs+nx; i++) {
2115       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2116       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2117       for (j=ys; j<ys+ny; j++) {
2118         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2119         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2120         slot   = i - gxs + gnx*(j - gys);
2121 
2122         /* Find block columns in block row */
2123         cnt = 0;
2124         for (ii=istart; ii<iend+1; ii++) {
2125           for (jj=jstart; jj<jend+1; jj++) {
2126             if (st == DMDA_STENCIL_BOX || !ii || !jj) {
2127               cols[cnt++] = slot + ii + gnx*jj;
2128             }
2129           }
2130         }
2131         ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
2132         ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
2133       }
2134     }
2135     ierr = PetscFree(values);CHKERRQ(ierr);
2136     /* do not copy values to GPU since they are all zero and not yet needed there */
2137     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
2138     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2139     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2140     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
2141     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2142   }
2143   ierr = PetscFree(cols);CHKERRQ(ierr);
2144   PetscFunctionReturn(0);
2145 }
2146 
2147 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da,Mat J)
2148 {
2149   PetscErrorCode         ierr;
2150   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
2151   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
2152   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
2153   MPI_Comm               comm;
2154   PetscScalar            *values;
2155   DMBoundaryType         bx,by,bz;
2156   DMDAStencilType        st;
2157   ISLocalToGlobalMapping ltog;
2158 
2159   PetscFunctionBegin;
2160   /*
2161      nc - number of components per grid point
2162      col - number of colors needed in one direction for single component problem
2163   */
2164   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,NULL,NULL,NULL,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
2165   col  = 2*s + 1;
2166 
2167   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
2168   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
2169   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
2170 
2171   /* create the matrix */
2172   ierr = PetscMalloc1(col*col*col,&cols);CHKERRQ(ierr);
2173 
2174   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
2175 
2176   /* determine the matrix preallocation information */
2177   ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
2178   for (i=xs; i<xs+nx; i++) {
2179     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2180     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2181     for (j=ys; j<ys+ny; j++) {
2182       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2183       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2184       for (k=zs; k<zs+nz; k++) {
2185         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2186         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2187 
2188         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2189 
2190         /* Find block columns in block row */
2191         cnt = 0;
2192         for (ii=istart; ii<iend+1; ii++) {
2193           for (jj=jstart; jj<jend+1; jj++) {
2194             for (kk=kstart; kk<kend+1; kk++) {
2195               if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
2196                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
2197               }
2198             }
2199           }
2200         }
2201         ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
2202         ierr = MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
2203       }
2204     }
2205   }
2206   ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
2207   ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
2208   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2209 
2210   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
2211 
2212   /*
2213     For each node in the grid: we get the neighbors in the local (on processor ordering
2214     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2215     PETSc ordering.
2216   */
2217   if (!da->prealloc_only) {
2218     ierr = PetscCalloc1(col*col*col*nc*nc,&values);CHKERRQ(ierr);
2219     for (i=xs; i<xs+nx; i++) {
2220       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2221       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2222       for (j=ys; j<ys+ny; j++) {
2223         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2224         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2225         for (k=zs; k<zs+nz; k++) {
2226           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2227           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2228 
2229           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2230 
2231           cnt = 0;
2232           for (ii=istart; ii<iend+1; ii++) {
2233             for (jj=jstart; jj<jend+1; jj++) {
2234               for (kk=kstart; kk<kend+1; kk++) {
2235                 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
2236                   cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
2237                 }
2238               }
2239             }
2240           }
2241           ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
2242           ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
2243         }
2244       }
2245     }
2246     ierr = PetscFree(values);CHKERRQ(ierr);
2247     /* do not copy values to GPU since they are all zero and not yet needed there */
2248     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
2249     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2250     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2251     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
2252     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2253   }
2254   ierr = PetscFree(cols);CHKERRQ(ierr);
2255   PetscFunctionReturn(0);
2256 }
2257 
2258 /* ---------------------------------------------------------------------------------*/
2259 
2260 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da,Mat J)
2261 {
2262   PetscErrorCode         ierr;
2263   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
2264   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt, maxcnt = 0,l,p,*dnz,*onz;
2265   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
2266   DM_DA                  *dd = (DM_DA*)da->data;
2267   PetscInt               ifill_col,*dfill = dd->dfill,*ofill = dd->ofill;
2268   MPI_Comm               comm;
2269   PetscScalar            *values;
2270   DMBoundaryType         bx,by,bz;
2271   ISLocalToGlobalMapping ltog;
2272   DMDAStencilType        st;
2273   PetscBool              removedups = PETSC_FALSE;
2274 
2275   PetscFunctionBegin;
2276   /*
2277          nc - number of components per grid point
2278          col - number of colors needed in one direction for single component problem
2279 
2280   */
2281   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
2282   col  = 2*s + 1;
2283   if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
2284                  by 2*stencil_width + 1\n");
2285   if (by == DM_BOUNDARY_PERIODIC && (n % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
2286                  by 2*stencil_width + 1\n");
2287   if (bz == DM_BOUNDARY_PERIODIC && (p % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
2288                  by 2*stencil_width + 1\n");
2289 
2290   /*
2291        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
2292        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
2293   */
2294   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
2295   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
2296   if (P == 1 && 2*s >= p) removedups = PETSC_TRUE;
2297 
2298   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
2299   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
2300   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
2301 
2302   ierr = PetscMalloc1(col*col*col*nc,&cols);CHKERRQ(ierr);
2303   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
2304 
2305   /* determine the matrix preallocation information */
2306   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
2307 
2308   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
2309   for (i=xs; i<xs+nx; i++) {
2310     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2311     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2312     for (j=ys; j<ys+ny; j++) {
2313       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2314       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2315       for (k=zs; k<zs+nz; k++) {
2316         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2317         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2318 
2319         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2320 
2321         for (l=0; l<nc; l++) {
2322           cnt = 0;
2323           for (ii=istart; ii<iend+1; ii++) {
2324             for (jj=jstart; jj<jend+1; jj++) {
2325               for (kk=kstart; kk<kend+1; kk++) {
2326                 if (ii || jj || kk) {
2327                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
2328                     for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2329                   }
2330                 } else {
2331                   if (dfill) {
2332                     for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2333                   } else {
2334                     for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2335                   }
2336                 }
2337               }
2338             }
2339           }
2340           row  = l + nc*(slot);
2341           maxcnt = PetscMax(maxcnt,cnt);
2342           if (removedups) {
2343             ierr = MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
2344           } else {
2345             ierr = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
2346           }
2347         }
2348       }
2349     }
2350   }
2351   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
2352   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
2353   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2354   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
2355 
2356   /*
2357     For each node in the grid: we get the neighbors in the local (on processor ordering
2358     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2359     PETSc ordering.
2360   */
2361   if (!da->prealloc_only) {
2362     ierr = PetscCalloc1(maxcnt,&values);CHKERRQ(ierr);
2363     for (i=xs; i<xs+nx; i++) {
2364       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2365       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2366       for (j=ys; j<ys+ny; j++) {
2367         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2368         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2369         for (k=zs; k<zs+nz; k++) {
2370           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2371           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2372 
2373           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2374 
2375           for (l=0; l<nc; l++) {
2376             cnt = 0;
2377             for (ii=istart; ii<iend+1; ii++) {
2378               for (jj=jstart; jj<jend+1; jj++) {
2379                 for (kk=kstart; kk<kend+1; kk++) {
2380                   if (ii || jj || kk) {
2381                     if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
2382                       for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2383                     }
2384                   } else {
2385                     if (dfill) {
2386                       for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2387                     } else {
2388                       for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2389                     }
2390                   }
2391                 }
2392               }
2393             }
2394             row  = l + nc*(slot);
2395             ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
2396           }
2397         }
2398       }
2399     }
2400     ierr = PetscFree(values);CHKERRQ(ierr);
2401     /* do not copy values to GPU since they are all zero and not yet needed there */
2402     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
2403     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2404     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2405     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
2406     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2407   }
2408   ierr = PetscFree(cols);CHKERRQ(ierr);
2409   PetscFunctionReturn(0);
2410 }
2411