xref: /petsc/src/dm/impls/da/fdda.c (revision d6874835c0a96c8bf5f402825f77e8369e8fe377)
1 
2 #include <petsc/private/dmdaimpl.h> /*I      "petscdmda.h"     I*/
3 #include <petscmat.h>
4 
5 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM,ISColoringType,ISColoring*);
6 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM,ISColoringType,ISColoring*);
7 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM,ISColoringType,ISColoring*);
8 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM,ISColoringType,ISColoring*);
9 
10 /*
11    For ghost i that may be negative or greater than the upper bound this
12   maps it into the 0:m-1 range using periodicity
13 */
14 #define SetInRange(i,m) ((i < 0) ? m+i : ((i >= m) ? i-m : i))
15 
16 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill,PetscInt w,PetscInt **rfill)
17 {
18   PetscErrorCode ierr;
19   PetscInt       i,j,nz,*fill;
20 
21   PetscFunctionBegin;
22   if (!dfill) PetscFunctionReturn(0);
23 
24   /* count number nonzeros */
25   nz = 0;
26   for (i=0; i<w; i++) {
27     for (j=0; j<w; j++) {
28       if (dfill[w*i+j]) nz++;
29     }
30   }
31   ierr = PetscMalloc1(nz + w + 1,&fill);CHKERRQ(ierr);
32   /* construct modified CSR storage of nonzero structure */
33   /*  fill[0 -- w] marks starts of each row of column indices (and end of last row)
34    so fill[1] - fill[0] gives number of nonzeros in first row etc */
35   nz = w + 1;
36   for (i=0; i<w; i++) {
37     fill[i] = nz;
38     for (j=0; j<w; j++) {
39       if (dfill[w*i+j]) {
40         fill[nz] = j;
41         nz++;
42       }
43     }
44   }
45   fill[w] = nz;
46 
47   *rfill = fill;
48   PetscFunctionReturn(0);
49 }
50 
51 
52 static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse,PetscInt w,PetscInt **rfill)
53 {
54   PetscErrorCode ierr;
55   PetscInt       nz;
56 
57   PetscFunctionBegin;
58   if (!dfillsparse) PetscFunctionReturn(0);
59 
60   /* Determine number of non-zeros */
61   nz = (dfillsparse[w] - w - 1);
62 
63   /* Allocate space for our copy of the given sparse matrix representation. */
64   ierr = PetscMalloc1(nz + w + 1,rfill);CHKERRQ(ierr);
65   ierr = PetscArraycpy(*rfill,dfillsparse,nz+w+1);CHKERRQ(ierr);
66   PetscFunctionReturn(0);
67 }
68 
69 
70 static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd)
71 {
72   PetscErrorCode ierr;
73   PetscInt       i,k,cnt = 1;
74 
75   PetscFunctionBegin;
76 
77   /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of
78    columns to the left with any nonzeros in them plus 1 */
79   ierr = PetscCalloc1(dd->w,&dd->ofillcols);CHKERRQ(ierr);
80   for (i=0; i<dd->w; i++) {
81     for (k=dd->ofill[i]; k<dd->ofill[i+1]; k++) dd->ofillcols[dd->ofill[k]] = 1;
82   }
83   for (i=0; i<dd->w; i++) {
84     if (dd->ofillcols[i]) {
85       dd->ofillcols[i] = cnt++;
86     }
87   }
88   PetscFunctionReturn(0);
89 }
90 
91 
92 
93 /*@
94     DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem
95     of the matrix returned by DMCreateMatrix().
96 
97     Logically Collective on da
98 
99     Input Parameter:
100 +   da - the distributed array
101 .   dfill - the fill pattern in the diagonal block (may be NULL, means use dense block)
102 -   ofill - the fill pattern in the off-diagonal blocks
103 
104 
105     Level: developer
106 
107     Notes:
108     This only makes sense when you are doing multicomponent problems but using the
109        MPIAIJ matrix format
110 
111            The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries
112        representing coupling and 0 entries for missing coupling. For example
113 $             dfill[9] = {1, 0, 0,
114 $                         1, 1, 0,
115 $                         0, 1, 1}
116        means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with
117        itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the
118        diagonal block).
119 
120      DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
121      can be represented in the dfill, ofill format
122 
123    Contributed by Glenn Hammond
124 
125 .seealso DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly()
126 
127 @*/
128 PetscErrorCode  DMDASetBlockFills(DM da,const PetscInt *dfill,const PetscInt *ofill)
129 {
130   DM_DA          *dd = (DM_DA*)da->data;
131   PetscErrorCode ierr;
132 
133   PetscFunctionBegin;
134   /* save the given dfill and ofill information */
135   ierr = DMDASetBlockFills_Private(dfill,dd->w,&dd->dfill);CHKERRQ(ierr);
136   ierr = DMDASetBlockFills_Private(ofill,dd->w,&dd->ofill);CHKERRQ(ierr);
137 
138   /* count nonzeros in ofill columns */
139   ierr = DMDASetBlockFills_Private2(dd);CHKERRQ(ierr);
140 
141   PetscFunctionReturn(0);
142 }
143 
144 
145 /*@
146     DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem
147     of the matrix returned by DMCreateMatrix(), using sparse representations
148     of fill patterns.
149 
150     Logically Collective on da
151 
152     Input Parameter:
153 +   da - the distributed array
154 .   dfill - the sparse fill pattern in the diagonal block (may be NULL, means use dense block)
155 -   ofill - the sparse fill pattern in the off-diagonal blocks
156 
157 
158     Level: developer
159 
160     Notes: This only makes sense when you are doing multicomponent problems but using the
161        MPIAIJ matrix format
162 
163            The format for dfill and ofill is a sparse representation of a
164            dof-by-dof matrix with 1 entries representing coupling and 0 entries
165            for missing coupling.  The sparse representation is a 1 dimensional
166            array of length nz + dof + 1, where nz is the number of non-zeros in
167            the matrix.  The first dof entries in the array give the
168            starting array indices of each row's items in the rest of the array,
169            the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array)
170            and the remaining nz items give the column indices of each of
171            the 1s within the logical 2D matrix.  Each row's items within
172            the array are the column indices of the 1s within that row
173            of the 2D matrix.  PETSc developers may recognize that this is the
174            same format as that computed by the DMDASetBlockFills_Private()
175            function from a dense 2D matrix representation.
176 
177      DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
178      can be represented in the dfill, ofill format
179 
180    Contributed by Philip C. Roth
181 
182 .seealso DMDASetBlockFills(), DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly()
183 
184 @*/
185 PetscErrorCode  DMDASetBlockFillsSparse(DM da,const PetscInt *dfillsparse,const PetscInt *ofillsparse)
186 {
187   DM_DA          *dd = (DM_DA*)da->data;
188   PetscErrorCode ierr;
189 
190   PetscFunctionBegin;
191   /* save the given dfill and ofill information */
192   ierr = DMDASetBlockFillsSparse_Private(dfillsparse,dd->w,&dd->dfill);CHKERRQ(ierr);
193   ierr = DMDASetBlockFillsSparse_Private(ofillsparse,dd->w,&dd->ofill);CHKERRQ(ierr);
194 
195   /* count nonzeros in ofill columns */
196   ierr = DMDASetBlockFills_Private2(dd);CHKERRQ(ierr);
197 
198   PetscFunctionReturn(0);
199 }
200 
201 
202 PetscErrorCode  DMCreateColoring_DA(DM da,ISColoringType ctype,ISColoring *coloring)
203 {
204   PetscErrorCode   ierr;
205   PetscInt         dim,m,n,p,nc;
206   DMBoundaryType bx,by,bz;
207   MPI_Comm         comm;
208   PetscMPIInt      size;
209   PetscBool        isBAIJ;
210   DM_DA            *dd = (DM_DA*)da->data;
211 
212   PetscFunctionBegin;
213   /*
214                                   m
215           ------------------------------------------------------
216          |                                                     |
217          |                                                     |
218          |               ----------------------                |
219          |               |                    |                |
220       n  |           yn  |                    |                |
221          |               |                    |                |
222          |               .---------------------                |
223          |             (xs,ys)     xn                          |
224          |            .                                        |
225          |         (gxs,gys)                                   |
226          |                                                     |
227           -----------------------------------------------------
228   */
229 
230   /*
231          nc - number of components per grid point
232          col - number of colors needed in one direction for single component problem
233 
234   */
235   ierr = DMDAGetInfo(da,&dim,0,0,0,&m,&n,&p,&nc,0,&bx,&by,&bz,0);CHKERRQ(ierr);
236 
237   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
238   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
239   if (ctype == IS_COLORING_LOCAL) {
240     if (size == 1) {
241       ctype = IS_COLORING_GLOBAL;
242     } else if (dim > 1) {
243       if ((m==1 && bx == DM_BOUNDARY_PERIODIC) || (n==1 && by == DM_BOUNDARY_PERIODIC) || (p==1 && bz == DM_BOUNDARY_PERIODIC)) {
244         SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain  on the same process");
245       }
246     }
247   }
248 
249   /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ
250      matrices is for the blocks, not the individual matrix elements  */
251   ierr = PetscStrcmp(da->mattype,MATBAIJ,&isBAIJ);CHKERRQ(ierr);
252   if (!isBAIJ) {ierr = PetscStrcmp(da->mattype,MATMPIBAIJ,&isBAIJ);CHKERRQ(ierr);}
253   if (!isBAIJ) {ierr = PetscStrcmp(da->mattype,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);}
254   if (isBAIJ) {
255     dd->w  = 1;
256     dd->xs = dd->xs/nc;
257     dd->xe = dd->xe/nc;
258     dd->Xs = dd->Xs/nc;
259     dd->Xe = dd->Xe/nc;
260   }
261 
262   /*
263      We do not provide a getcoloring function in the DMDA operations because
264    the basic DMDA does not know about matrices. We think of DMDA as being more
265    more low-level then matrices.
266   */
267   if (dim == 1) {
268     ierr = DMCreateColoring_DA_1d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
269   } else if (dim == 2) {
270     ierr =  DMCreateColoring_DA_2d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
271   } else if (dim == 3) {
272     ierr =  DMCreateColoring_DA_3d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
273   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not done for %D dimension, send us mail petsc-maint@mcs.anl.gov for code",dim);
274   if (isBAIJ) {
275     dd->w  = nc;
276     dd->xs = dd->xs*nc;
277     dd->xe = dd->xe*nc;
278     dd->Xs = dd->Xs*nc;
279     dd->Xe = dd->Xe*nc;
280   }
281   PetscFunctionReturn(0);
282 }
283 
284 /* ---------------------------------------------------------------------------------*/
285 
286 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
287 {
288   PetscErrorCode   ierr;
289   PetscInt         xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,M,N,dim,s,k,nc,col;
290   PetscInt         ncolors;
291   MPI_Comm         comm;
292   DMBoundaryType bx,by;
293   DMDAStencilType  st;
294   ISColoringValue  *colors;
295   DM_DA            *dd = (DM_DA*)da->data;
296 
297   PetscFunctionBegin;
298   /*
299          nc - number of components per grid point
300          col - number of colors needed in one direction for single component problem
301 
302   */
303   ierr = DMDAGetInfo(da,&dim,&m,&n,0,&M,&N,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
304   col  = 2*s + 1;
305   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
306   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
307   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
308 
309   /* special case as taught to us by Paul Hovland */
310   if (st == DMDA_STENCIL_STAR && s == 1) {
311     ierr = DMCreateColoring_DA_2d_5pt_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
312   } else {
313 
314     if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ2(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X (%d) is divisible\n\
315                                                             by 2*stencil_width + 1 (%d)\n", m, col);
316     if (by == DM_BOUNDARY_PERIODIC && (n % col)) SETERRQ2(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y (%d) is divisible\n\
317                                                             by 2*stencil_width + 1 (%d)\n", n, col);
318     if (ctype == IS_COLORING_GLOBAL) {
319       if (!dd->localcoloring) {
320         ierr = PetscMalloc1(nc*nx*ny,&colors);CHKERRQ(ierr);
321         ii   = 0;
322         for (j=ys; j<ys+ny; j++) {
323           for (i=xs; i<xs+nx; i++) {
324             for (k=0; k<nc; k++) {
325               colors[ii++] = k + nc*((i % col) + col*(j % col));
326             }
327           }
328         }
329         ncolors = nc + nc*(col-1 + col*(col-1));
330         ierr    = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
331       }
332       *coloring = dd->localcoloring;
333     } else if (ctype == IS_COLORING_LOCAL) {
334       if (!dd->ghostedcoloring) {
335         ierr = PetscMalloc1(nc*gnx*gny,&colors);CHKERRQ(ierr);
336         ii   = 0;
337         for (j=gys; j<gys+gny; j++) {
338           for (i=gxs; i<gxs+gnx; i++) {
339             for (k=0; k<nc; k++) {
340               /* the complicated stuff is to handle periodic boundaries */
341               colors[ii++] = k + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col));
342             }
343           }
344         }
345         ncolors = nc + nc*(col - 1 + col*(col-1));
346         ierr    = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
347         /* PetscIntView(ncolors,(PetscInt*)colors,0); */
348 
349         ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
350       }
351       *coloring = dd->ghostedcoloring;
352     } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
353   }
354   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
355   PetscFunctionReturn(0);
356 }
357 
358 /* ---------------------------------------------------------------------------------*/
359 
360 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
361 {
362   PetscErrorCode   ierr;
363   PetscInt         xs,ys,nx,ny,i,j,gxs,gys,gnx,gny,m,n,p,dim,s,k,nc,col,zs,gzs,ii,l,nz,gnz,M,N,P;
364   PetscInt         ncolors;
365   MPI_Comm         comm;
366   DMBoundaryType bx,by,bz;
367   DMDAStencilType  st;
368   ISColoringValue  *colors;
369   DM_DA            *dd = (DM_DA*)da->data;
370 
371   PetscFunctionBegin;
372   /*
373          nc - number of components per grid point
374          col - number of colors needed in one direction for single component problem
375 
376   */
377   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
378   col  = 2*s + 1;
379   if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
380                                                          by 2*stencil_width + 1\n");
381   if (by == DM_BOUNDARY_PERIODIC && (n % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
382                                                          by 2*stencil_width + 1\n");
383   if (bz == DM_BOUNDARY_PERIODIC && (p % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
384                                                          by 2*stencil_width + 1\n");
385 
386   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
387   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
388   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
389 
390   /* create the coloring */
391   if (ctype == IS_COLORING_GLOBAL) {
392     if (!dd->localcoloring) {
393       ierr = PetscMalloc1(nc*nx*ny*nz,&colors);CHKERRQ(ierr);
394       ii   = 0;
395       for (k=zs; k<zs+nz; k++) {
396         for (j=ys; j<ys+ny; j++) {
397           for (i=xs; i<xs+nx; i++) {
398             for (l=0; l<nc; l++) {
399               colors[ii++] = l + nc*((i % col) + col*(j % col) + col*col*(k % col));
400             }
401           }
402         }
403       }
404       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
405       ierr    = ISColoringCreate(comm,ncolors,nc*nx*ny*nz,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
406     }
407     *coloring = dd->localcoloring;
408   } else if (ctype == IS_COLORING_LOCAL) {
409     if (!dd->ghostedcoloring) {
410       ierr = PetscMalloc1(nc*gnx*gny*gnz,&colors);CHKERRQ(ierr);
411       ii   = 0;
412       for (k=gzs; k<gzs+gnz; k++) {
413         for (j=gys; j<gys+gny; j++) {
414           for (i=gxs; i<gxs+gnx; i++) {
415             for (l=0; l<nc; l++) {
416               /* the complicated stuff is to handle periodic boundaries */
417               colors[ii++] = l + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col) + col*col*(SetInRange(k,p) % col));
418             }
419           }
420         }
421       }
422       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
423       ierr    = ISColoringCreate(comm,ncolors,nc*gnx*gny*gnz,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
424       ierr    = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
425     }
426     *coloring = dd->ghostedcoloring;
427   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
428   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
429   PetscFunctionReturn(0);
430 }
431 
432 /* ---------------------------------------------------------------------------------*/
433 
434 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
435 {
436   PetscErrorCode   ierr;
437   PetscInt         xs,nx,i,i1,gxs,gnx,l,m,M,dim,s,nc,col;
438   PetscInt         ncolors;
439   MPI_Comm         comm;
440   DMBoundaryType bx;
441   ISColoringValue  *colors;
442   DM_DA            *dd = (DM_DA*)da->data;
443 
444   PetscFunctionBegin;
445   /*
446          nc - number of components per grid point
447          col - number of colors needed in one direction for single component problem
448 
449   */
450   ierr = DMDAGetInfo(da,&dim,&m,0,0,&M,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr);
451   col  = 2*s + 1;
452 
453   if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ2(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points %d is divisible\n\
454                                                           by 2*stencil_width + 1 %d\n",(int)m,(int)col);
455 
456   ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
457   ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
458   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
459 
460   /* create the coloring */
461   if (ctype == IS_COLORING_GLOBAL) {
462     if (!dd->localcoloring) {
463       ierr = PetscMalloc1(nc*nx,&colors);CHKERRQ(ierr);
464       if (dd->ofillcols) {
465         PetscInt tc = 0;
466         for (i=0; i<nc; i++) tc += (PetscInt) (dd->ofillcols[i] > 0);
467         i1 = 0;
468         for (i=xs; i<xs+nx; i++) {
469           for (l=0; l<nc; l++) {
470             if (dd->ofillcols[l] && (i % col)) {
471               colors[i1++] =  nc - 1 + tc*((i % col) - 1) + dd->ofillcols[l];
472             } else {
473               colors[i1++] = l;
474             }
475           }
476         }
477         ncolors = nc + 2*s*tc;
478       } else {
479         i1 = 0;
480         for (i=xs; i<xs+nx; i++) {
481           for (l=0; l<nc; l++) {
482             colors[i1++] = l + nc*(i % col);
483           }
484         }
485         ncolors = nc + nc*(col-1);
486       }
487       ierr = ISColoringCreate(comm,ncolors,nc*nx,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
488     }
489     *coloring = dd->localcoloring;
490   } else if (ctype == IS_COLORING_LOCAL) {
491     if (!dd->ghostedcoloring) {
492       ierr = PetscMalloc1(nc*gnx,&colors);CHKERRQ(ierr);
493       i1   = 0;
494       for (i=gxs; i<gxs+gnx; i++) {
495         for (l=0; l<nc; l++) {
496           /* the complicated stuff is to handle periodic boundaries */
497           colors[i1++] = l + nc*(SetInRange(i,m) % col);
498         }
499       }
500       ncolors = nc + nc*(col-1);
501       ierr    = ISColoringCreate(comm,ncolors,nc*gnx,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
502       ierr    = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
503     }
504     *coloring = dd->ghostedcoloring;
505   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
506   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
507   PetscFunctionReturn(0);
508 }
509 
510 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
511 {
512   PetscErrorCode   ierr;
513   PetscInt         xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,dim,s,k,nc;
514   PetscInt         ncolors;
515   MPI_Comm         comm;
516   DMBoundaryType bx,by;
517   ISColoringValue  *colors;
518   DM_DA            *dd = (DM_DA*)da->data;
519 
520   PetscFunctionBegin;
521   /*
522          nc - number of components per grid point
523          col - number of colors needed in one direction for single component problem
524 
525   */
526   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,0);CHKERRQ(ierr);
527   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
528   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
529   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
530 
531   if (bx == DM_BOUNDARY_PERIODIC && (m % 5)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible by 5\n");
532   if (by == DM_BOUNDARY_PERIODIC && (n % 5)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible by 5\n");
533 
534   /* create the coloring */
535   if (ctype == IS_COLORING_GLOBAL) {
536     if (!dd->localcoloring) {
537       ierr = PetscMalloc1(nc*nx*ny,&colors);CHKERRQ(ierr);
538       ii   = 0;
539       for (j=ys; j<ys+ny; j++) {
540         for (i=xs; i<xs+nx; i++) {
541           for (k=0; k<nc; k++) {
542             colors[ii++] = k + nc*((3*j+i) % 5);
543           }
544         }
545       }
546       ncolors = 5*nc;
547       ierr    = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
548     }
549     *coloring = dd->localcoloring;
550   } else if (ctype == IS_COLORING_LOCAL) {
551     if (!dd->ghostedcoloring) {
552       ierr = PetscMalloc1(nc*gnx*gny,&colors);CHKERRQ(ierr);
553       ii = 0;
554       for (j=gys; j<gys+gny; j++) {
555         for (i=gxs; i<gxs+gnx; i++) {
556           for (k=0; k<nc; k++) {
557             colors[ii++] = k + nc*((3*SetInRange(j,n) + SetInRange(i,m)) % 5);
558           }
559         }
560       }
561       ncolors = 5*nc;
562       ierr    = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
563       ierr    = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
564     }
565     *coloring = dd->ghostedcoloring;
566   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
567   PetscFunctionReturn(0);
568 }
569 
570 /* =========================================================================== */
571 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM,Mat);
572 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM,Mat);
573 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM,Mat);
574 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM,Mat);
575 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM,Mat);
576 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM,Mat);
577 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM,Mat);
578 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM,Mat);
579 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM,Mat);
580 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM,Mat);
581 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM,Mat);
582 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM,Mat);
583 extern PetscErrorCode DMCreateMatrix_DA_IS(DM,Mat);
584 
585 /*@C
586    MatSetupDM - Sets the DMDA that is to be used by the HYPRE_StructMatrix PETSc matrix
587 
588    Logically Collective on mat
589 
590    Input Parameters:
591 +  mat - the matrix
592 -  da - the da
593 
594    Level: intermediate
595 
596 @*/
597 PetscErrorCode MatSetupDM(Mat mat,DM da)
598 {
599   PetscErrorCode ierr;
600 
601   PetscFunctionBegin;
602   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
603   PetscValidHeaderSpecificType(da,DM_CLASSID,1,DMDA);
604   ierr = PetscTryMethod(mat,"MatSetupDM_C",(Mat,DM),(mat,da));CHKERRQ(ierr);
605   PetscFunctionReturn(0);
606 }
607 
608 PetscErrorCode  MatView_MPI_DA(Mat A,PetscViewer viewer)
609 {
610   DM                da;
611   PetscErrorCode    ierr;
612   const char        *prefix;
613   Mat               Anatural;
614   AO                ao;
615   PetscInt          rstart,rend,*petsc,i;
616   IS                is;
617   MPI_Comm          comm;
618   PetscViewerFormat format;
619 
620   PetscFunctionBegin;
621   /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */
622   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
623   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(0);
624 
625   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
626   ierr = MatGetDM(A, &da);CHKERRQ(ierr);
627   if (!da) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
628 
629   ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);
630   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
631   ierr = PetscMalloc1(rend-rstart,&petsc);CHKERRQ(ierr);
632   for (i=rstart; i<rend; i++) petsc[i-rstart] = i;
633   ierr = AOApplicationToPetsc(ao,rend-rstart,petsc);CHKERRQ(ierr);
634   ierr = ISCreateGeneral(comm,rend-rstart,petsc,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
635 
636   /* call viewer on natural ordering */
637   ierr = MatCreateSubMatrix(A,is,is,MAT_INITIAL_MATRIX,&Anatural);CHKERRQ(ierr);
638   ierr = ISDestroy(&is);CHKERRQ(ierr);
639   ierr = PetscObjectGetOptionsPrefix((PetscObject)A,&prefix);CHKERRQ(ierr);
640   ierr = PetscObjectSetOptionsPrefix((PetscObject)Anatural,prefix);CHKERRQ(ierr);
641   ierr = PetscObjectSetName((PetscObject)Anatural,((PetscObject)A)->name);CHKERRQ(ierr);
642   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE;
643   ierr = MatView(Anatural,viewer);CHKERRQ(ierr);
644   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE;
645   ierr = MatDestroy(&Anatural);CHKERRQ(ierr);
646   PetscFunctionReturn(0);
647 }
648 
649 PetscErrorCode  MatLoad_MPI_DA(Mat A,PetscViewer viewer)
650 {
651   DM             da;
652   PetscErrorCode ierr;
653   Mat            Anatural,Aapp;
654   AO             ao;
655   PetscInt       rstart,rend,*app,i,m,n,M,N;
656   IS             is;
657   MPI_Comm       comm;
658 
659   PetscFunctionBegin;
660   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
661   ierr = MatGetDM(A, &da);CHKERRQ(ierr);
662   if (!da) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
663 
664   /* Load the matrix in natural ordering */
665   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Anatural);CHKERRQ(ierr);
666   ierr = MatSetType(Anatural,((PetscObject)A)->type_name);CHKERRQ(ierr);
667   ierr = MatGetSize(A,&M,&N);CHKERRQ(ierr);
668   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
669   ierr = MatSetSizes(Anatural,m,n,M,N);CHKERRQ(ierr);
670   ierr = MatLoad(Anatural,viewer);CHKERRQ(ierr);
671 
672   /* Map natural ordering to application ordering and create IS */
673   ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);
674   ierr = MatGetOwnershipRange(Anatural,&rstart,&rend);CHKERRQ(ierr);
675   ierr = PetscMalloc1(rend-rstart,&app);CHKERRQ(ierr);
676   for (i=rstart; i<rend; i++) app[i-rstart] = i;
677   ierr = AOPetscToApplication(ao,rend-rstart,app);CHKERRQ(ierr);
678   ierr = ISCreateGeneral(comm,rend-rstart,app,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
679 
680   /* Do permutation and replace header */
681   ierr = MatCreateSubMatrix(Anatural,is,is,MAT_INITIAL_MATRIX,&Aapp);CHKERRQ(ierr);
682   ierr = MatHeaderReplace(A,&Aapp);CHKERRQ(ierr);
683   ierr = ISDestroy(&is);CHKERRQ(ierr);
684   ierr = MatDestroy(&Anatural);CHKERRQ(ierr);
685   PetscFunctionReturn(0);
686 }
687 
688 PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J)
689 {
690   PetscErrorCode ierr;
691   PetscInt       dim,dof,nx,ny,nz,dims[3],starts[3],M,N,P;
692   Mat            A;
693   MPI_Comm       comm;
694   MatType        Atype;
695   void           (*aij)(void)=NULL,(*baij)(void)=NULL,(*sbaij)(void)=NULL,(*sell)(void)=NULL,(*is)(void)=NULL;
696   MatType        mtype;
697   PetscMPIInt    size;
698   DM_DA          *dd = (DM_DA*)da->data;
699 
700   PetscFunctionBegin;
701   ierr = MatInitializePackage();CHKERRQ(ierr);
702   mtype = da->mattype;
703 
704   /*
705                                   m
706           ------------------------------------------------------
707          |                                                     |
708          |                                                     |
709          |               ----------------------                |
710          |               |                    |                |
711       n  |           ny  |                    |                |
712          |               |                    |                |
713          |               .---------------------                |
714          |             (xs,ys)     nx                          |
715          |            .                                        |
716          |         (gxs,gys)                                   |
717          |                                                     |
718           -----------------------------------------------------
719   */
720 
721   /*
722          nc - number of components per grid point
723          col - number of colors needed in one direction for single component problem
724 
725   */
726   M   = dd->M;
727   N   = dd->N;
728   P   = dd->P;
729   dim = da->dim;
730   dof = dd->w;
731   /* ierr = DMDAGetInfo(da,&dim,&M,&N,&P,0,0,0,&dof,0,0,0,0,0);CHKERRQ(ierr); */
732   ierr = DMDAGetCorners(da,0,0,0,&nx,&ny,&nz);CHKERRQ(ierr);
733   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
734   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
735   ierr = MatSetSizes(A,dof*nx*ny*nz,dof*nx*ny*nz,dof*M*N*P,dof*M*N*P);CHKERRQ(ierr);
736   ierr = MatSetType(A,mtype);CHKERRQ(ierr);
737   ierr = MatSetFromOptions(A);CHKERRQ(ierr);
738   ierr = MatSetDM(A,da);CHKERRQ(ierr);
739   if (da->structure_only) {
740     ierr = MatSetOption(A,MAT_STRUCTURE_ONLY,PETSC_TRUE);CHKERRQ(ierr);
741   }
742   ierr = MatGetType(A,&Atype);CHKERRQ(ierr);
743   /*
744      We do not provide a getmatrix function in the DMDA operations because
745    the basic DMDA does not know about matrices. We think of DMDA as being more
746    more low-level than matrices. This is kind of cheating but, cause sometimes
747    we think of DMDA has higher level than matrices.
748 
749      We could switch based on Atype (or mtype), but we do not since the
750    specialized setting routines depend only on the particular preallocation
751    details of the matrix, not the type itself.
752   */
753   ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJSetPreallocation_C",&aij);CHKERRQ(ierr);
754   if (!aij) {
755     ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJSetPreallocation_C",&aij);CHKERRQ(ierr);
756   }
757   if (!aij) {
758     ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIBAIJSetPreallocation_C",&baij);CHKERRQ(ierr);
759     if (!baij) {
760       ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",&baij);CHKERRQ(ierr);
761     }
762     if (!baij) {
763       ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPISBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr);
764       if (!sbaij) {
765         ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr);
766       }
767       if (!sbaij) {
768         ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPISELLSetPreallocation_C",&sell);CHKERRQ(ierr);
769         if (!sell) {
770           ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqSELLSetPreallocation_C",&sell);CHKERRQ(ierr);
771         }
772       }
773       if (!sell) {
774         ierr = PetscObjectQueryFunction((PetscObject)A,"MatISSetPreallocation_C",&is);CHKERRQ(ierr);
775       }
776     }
777   }
778   if (aij) {
779     if (dim == 1) {
780       if (dd->ofill) {
781         ierr = DMCreateMatrix_DA_1d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
782       } else {
783         ierr = DMCreateMatrix_DA_1d_MPIAIJ(da,A);CHKERRQ(ierr);
784       }
785     } else if (dim == 2) {
786       if (dd->ofill) {
787         ierr = DMCreateMatrix_DA_2d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
788       } else {
789         ierr = DMCreateMatrix_DA_2d_MPIAIJ(da,A);CHKERRQ(ierr);
790       }
791     } else if (dim == 3) {
792       if (dd->ofill) {
793         ierr = DMCreateMatrix_DA_3d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
794       } else {
795         ierr = DMCreateMatrix_DA_3d_MPIAIJ(da,A);CHKERRQ(ierr);
796       }
797     }
798   } else if (baij) {
799     if (dim == 2) {
800       ierr = DMCreateMatrix_DA_2d_MPIBAIJ(da,A);CHKERRQ(ierr);
801     } else if (dim == 3) {
802       ierr = DMCreateMatrix_DA_3d_MPIBAIJ(da,A);CHKERRQ(ierr);
803     } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
804   } else if (sbaij) {
805     if (dim == 2) {
806       ierr = DMCreateMatrix_DA_2d_MPISBAIJ(da,A);CHKERRQ(ierr);
807     } else if (dim == 3) {
808       ierr = DMCreateMatrix_DA_3d_MPISBAIJ(da,A);CHKERRQ(ierr);
809     } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
810   } else if (sell) {
811      if (dim == 2) {
812        ierr = DMCreateMatrix_DA_2d_MPISELL(da,A);CHKERRQ(ierr);
813      } else if (dim == 3) {
814        ierr = DMCreateMatrix_DA_3d_MPISELL(da,A);CHKERRQ(ierr);
815      } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
816   } else if (is) {
817     ierr = DMCreateMatrix_DA_IS(da,A);CHKERRQ(ierr);
818   } else {
819     ISLocalToGlobalMapping ltog;
820 
821     ierr = MatSetBlockSize(A,dof);CHKERRQ(ierr);
822     ierr = MatSetUp(A);CHKERRQ(ierr);
823     ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
824     ierr = MatSetLocalToGlobalMapping(A,ltog,ltog);CHKERRQ(ierr);
825   }
826   ierr = DMDAGetGhostCorners(da,&starts[0],&starts[1],&starts[2],&dims[0],&dims[1],&dims[2]);CHKERRQ(ierr);
827   ierr = MatSetStencil(A,dim,dims,starts,dof);CHKERRQ(ierr);
828   ierr = MatSetDM(A,da);CHKERRQ(ierr);
829   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
830   if (size > 1) {
831     /* change viewer to display matrix in natural ordering */
832     ierr = MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA);CHKERRQ(ierr);
833     ierr = MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA);CHKERRQ(ierr);
834   }
835   *J = A;
836   PetscFunctionReturn(0);
837 }
838 
839 /* ---------------------------------------------------------------------------------*/
840 PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
841 
842 PetscErrorCode DMCreateMatrix_DA_IS(DM dm,Mat J)
843 {
844   DM_DA                  *da = (DM_DA*)dm->data;
845   Mat                    lJ;
846   ISLocalToGlobalMapping ltog;
847   IS                     is_loc_filt, is_glob;
848   const PetscInt         *e_loc,*idx;
849   PetscInt               nel,nen,nv,dof,dim,*gidx,nb;
850   PetscBool              flg;
851   PetscErrorCode         ierr;
852 
853   /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted)
854      We need to filter the local indices that are represented through the DMDAGetElements decomposition
855      This is because the size of the local matrices in MATIS is the local size of the l2g map */
856   PetscFunctionBegin;
857   dof  = da->w;
858   dim  = dm->dim;
859 
860   ierr = MatSetBlockSize(J,dof);CHKERRQ(ierr);
861 
862   /* get local elements indices in local DMDA numbering */
863   ierr = DMDAGetElements(dm,&nel,&nen,&e_loc);CHKERRQ(ierr); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */
864   ierr = ISCreateBlock(PetscObjectComm((PetscObject)dm),dof,nel*nen,e_loc,PETSC_COPY_VALUES,&is_loc_filt);CHKERRQ(ierr);
865   ierr = DMDARestoreElements(dm,&nel,&nen,&e_loc);CHKERRQ(ierr);
866 
867   /* obtain a consistent local ordering for MATIS */
868   ierr = ISSortRemoveDups(is_loc_filt);CHKERRQ(ierr);
869   ierr = ISBlockGetLocalSize(is_loc_filt,&nb);CHKERRQ(ierr);
870   ierr = DMGetLocalToGlobalMapping(dm,&ltog);CHKERRQ(ierr);
871   ierr = ISLocalToGlobalMappingGetSize(ltog,&nv);CHKERRQ(ierr);
872   ierr = PetscMalloc1(PetscMax(nb,nv/dof),&gidx);CHKERRQ(ierr);
873   ierr = ISBlockGetIndices(is_loc_filt,&idx);CHKERRQ(ierr);
874   ierr = ISLocalToGlobalMappingApplyBlock(ltog,nb,idx,gidx);CHKERRQ(ierr);
875   ierr = ISBlockRestoreIndices(is_loc_filt,&idx);CHKERRQ(ierr);
876   ierr = ISCreateBlock(PetscObjectComm((PetscObject)dm),dof,nb,gidx,PETSC_USE_POINTER,&is_glob);CHKERRQ(ierr);
877   ierr = ISLocalToGlobalMappingCreateIS(is_glob,&ltog);CHKERRQ(ierr);
878   ierr = ISDestroy(&is_glob);CHKERRQ(ierr);
879   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
880   ierr = ISLocalToGlobalMappingDestroy(&ltog);CHKERRQ(ierr);
881 
882   /* We also attach a l2g map to the local matrices to have MatSetValueLocal to work */
883   ierr = MatISGetLocalMat(J,&lJ);CHKERRQ(ierr);
884   ierr = ISLocalToGlobalMappingCreateIS(is_loc_filt,&ltog);CHKERRQ(ierr);
885   ierr = ISDestroy(&is_loc_filt);CHKERRQ(ierr);
886   ierr = ISCreateStride(PetscObjectComm((PetscObject)lJ),nv/dof,0,1,&is_glob);CHKERRQ(ierr);
887   ierr = ISGetIndices(is_glob,&idx);CHKERRQ(ierr);
888   ierr = ISGlobalToLocalMappingApplyBlock(ltog,IS_GTOLM_MASK,nv/dof,idx,&nb,gidx);CHKERRQ(ierr);
889   ierr = ISRestoreIndices(is_glob,&idx);CHKERRQ(ierr);
890   ierr = ISDestroy(&is_glob);CHKERRQ(ierr);
891   ierr = ISLocalToGlobalMappingDestroy(&ltog);CHKERRQ(ierr);
892   ierr = ISCreateBlock(PETSC_COMM_SELF,dof,nb,gidx,PETSC_USE_POINTER,&is_loc_filt);CHKERRQ(ierr);
893   ierr = ISLocalToGlobalMappingCreateIS(is_loc_filt,&ltog);CHKERRQ(ierr);
894   ierr = ISDestroy(&is_loc_filt);CHKERRQ(ierr);
895   ierr = MatSetLocalToGlobalMapping(lJ,ltog,ltog);CHKERRQ(ierr);
896   ierr = ISLocalToGlobalMappingDestroy(&ltog);CHKERRQ(ierr);
897   ierr = PetscFree(gidx);CHKERRQ(ierr);
898 
899   /* Preallocation (not exact): we reuse the preallocation routines of the assembled version  */
900   flg = dm->prealloc_only;
901   dm->prealloc_only = PETSC_TRUE;
902   switch (dim) {
903   case 1:
904     ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",MatISSetPreallocation_IS);CHKERRQ(ierr);
905     ierr = DMCreateMatrix_DA_1d_MPIAIJ(dm,J);CHKERRQ(ierr);
906     ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
907     break;
908   case 2:
909     ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",MatISSetPreallocation_IS);CHKERRQ(ierr);
910     ierr = DMCreateMatrix_DA_2d_MPIAIJ(dm,J);CHKERRQ(ierr);
911     ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
912     break;
913   case 3:
914     ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",MatISSetPreallocation_IS);CHKERRQ(ierr);
915     ierr = DMCreateMatrix_DA_3d_MPIAIJ(dm,J);CHKERRQ(ierr);
916     ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
917     break;
918   default:
919     SETERRQ1(PetscObjectComm((PetscObject)dm),PETSC_ERR_SUP,"Unhandled dimension %d",dim);
920     break;
921   }
922   dm->prealloc_only = flg;
923   PetscFunctionReturn(0);
924 }
925 
926 PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da,Mat J)
927 {
928   PetscErrorCode         ierr;
929   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p;
930   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
931   MPI_Comm               comm;
932   PetscScalar            *values;
933   DMBoundaryType         bx,by;
934   ISLocalToGlobalMapping ltog;
935   DMDAStencilType        st;
936 
937   PetscFunctionBegin;
938   /*
939          nc - number of components per grid point
940          col - number of colors needed in one direction for single component problem
941 
942   */
943   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
944   col  = 2*s + 1;
945   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
946   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
947   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
948 
949   ierr = PetscMalloc2(nc,&rows,col*col*nc*nc,&cols);CHKERRQ(ierr);
950   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
951 
952   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
953   /* determine the matrix preallocation information */
954   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
955   for (i=xs; i<xs+nx; i++) {
956 
957     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
958     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
959 
960     for (j=ys; j<ys+ny; j++) {
961       slot = i - gxs + gnx*(j - gys);
962 
963       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
964       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
965 
966       cnt = 0;
967       for (k=0; k<nc; k++) {
968         for (l=lstart; l<lend+1; l++) {
969           for (p=pstart; p<pend+1; p++) {
970             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
971               cols[cnt++] = k + nc*(slot + gnx*l + p);
972             }
973           }
974         }
975         rows[k] = k + nc*(slot);
976       }
977       ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
978     }
979   }
980   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
981   ierr = MatSeqSELLSetPreallocation(J,0,dnz);CHKERRQ(ierr);
982   ierr = MatMPISELLSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
983   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
984 
985   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
986 
987   /*
988     For each node in the grid: we get the neighbors in the local (on processor ordering
989     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
990     PETSc ordering.
991   */
992   if (!da->prealloc_only) {
993     ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr);
994     for (i=xs; i<xs+nx; i++) {
995 
996       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
997       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
998 
999       for (j=ys; j<ys+ny; j++) {
1000         slot = i - gxs + gnx*(j - gys);
1001 
1002         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1003         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1004 
1005         cnt = 0;
1006         for (k=0; k<nc; k++) {
1007           for (l=lstart; l<lend+1; l++) {
1008             for (p=pstart; p<pend+1; p++) {
1009               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
1010                 cols[cnt++] = k + nc*(slot + gnx*l + p);
1011               }
1012             }
1013           }
1014           rows[k] = k + nc*(slot);
1015         }
1016         ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1017       }
1018     }
1019     ierr = PetscFree(values);CHKERRQ(ierr);
1020     /* do not copy values to GPU since they are all zero and not yet needed there */
1021     ierr = MatPinToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1022     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1023     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1024     ierr = MatPinToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1025     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1026   }
1027   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1028   PetscFunctionReturn(0);
1029 }
1030 
1031 PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da,Mat J)
1032 {
1033   PetscErrorCode         ierr;
1034   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1035   PetscInt               m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL;
1036   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
1037   MPI_Comm               comm;
1038   PetscScalar            *values;
1039   DMBoundaryType         bx,by,bz;
1040   ISLocalToGlobalMapping ltog;
1041   DMDAStencilType        st;
1042 
1043   PetscFunctionBegin;
1044   /*
1045          nc - number of components per grid point
1046          col - number of colors needed in one direction for single component problem
1047 
1048   */
1049   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1050   col  = 2*s + 1;
1051   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1052   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1053   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1054 
1055   ierr = PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols);CHKERRQ(ierr);
1056   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1057 
1058   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1059   /* determine the matrix preallocation information */
1060   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1061   for (i=xs; i<xs+nx; i++) {
1062     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1063     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1064     for (j=ys; j<ys+ny; j++) {
1065       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1066       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1067       for (k=zs; k<zs+nz; k++) {
1068         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1069         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1070 
1071         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1072 
1073         cnt = 0;
1074         for (l=0; l<nc; l++) {
1075           for (ii=istart; ii<iend+1; ii++) {
1076             for (jj=jstart; jj<jend+1; jj++) {
1077               for (kk=kstart; kk<kend+1; kk++) {
1078                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1079                   cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1080                 }
1081               }
1082             }
1083           }
1084           rows[l] = l + nc*(slot);
1085         }
1086         ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1087       }
1088     }
1089   }
1090   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1091   ierr = MatSeqSELLSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1092   ierr = MatMPISELLSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1093   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1094   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1095 
1096   /*
1097     For each node in the grid: we get the neighbors in the local (on processor ordering
1098     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1099     PETSc ordering.
1100   */
1101   if (!da->prealloc_only) {
1102     ierr = PetscCalloc1(col*col*col*nc*nc*nc,&values);CHKERRQ(ierr);
1103     for (i=xs; i<xs+nx; i++) {
1104       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1105       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1106       for (j=ys; j<ys+ny; j++) {
1107         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1108         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1109         for (k=zs; k<zs+nz; k++) {
1110           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1111           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1112 
1113           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1114 
1115           cnt = 0;
1116           for (l=0; l<nc; l++) {
1117             for (ii=istart; ii<iend+1; ii++) {
1118               for (jj=jstart; jj<jend+1; jj++) {
1119                 for (kk=kstart; kk<kend+1; kk++) {
1120                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1121                     cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1122                   }
1123                 }
1124               }
1125             }
1126             rows[l] = l + nc*(slot);
1127           }
1128           ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1129         }
1130       }
1131     }
1132     ierr = PetscFree(values);CHKERRQ(ierr);
1133     /* do not copy values to GPU since they are all zero and not yet needed there */
1134     ierr = MatPinToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1135     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1136     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1137     ierr = MatPinToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1138     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1139   }
1140   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1141   PetscFunctionReturn(0);
1142 }
1143 
1144 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da,Mat J)
1145 {
1146   PetscErrorCode         ierr;
1147   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,M,N;
1148   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
1149   MPI_Comm               comm;
1150   PetscScalar            *values;
1151   DMBoundaryType         bx,by;
1152   ISLocalToGlobalMapping ltog,mltog;
1153   DMDAStencilType        st;
1154   PetscBool              removedups = PETSC_FALSE;
1155 
1156   PetscFunctionBegin;
1157   /*
1158          nc - number of components per grid point
1159          col - number of colors needed in one direction for single component problem
1160 
1161   */
1162   ierr = DMDAGetInfo(da,&dim,&m,&n,&M,&N,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
1163   col  = 2*s + 1;
1164   /*
1165        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1166        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1167   */
1168   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1169   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1170   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
1171   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
1172   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1173 
1174   ierr = PetscMalloc2(nc,&rows,col*col*nc*nc,&cols);CHKERRQ(ierr);
1175   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1176 
1177   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1178   /* determine the matrix preallocation information */
1179   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
1180   for (i=xs; i<xs+nx; i++) {
1181 
1182     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1183     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1184 
1185     for (j=ys; j<ys+ny; j++) {
1186       slot = i - gxs + gnx*(j - gys);
1187 
1188       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1189       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1190 
1191       cnt = 0;
1192       for (k=0; k<nc; k++) {
1193         for (l=lstart; l<lend+1; l++) {
1194           for (p=pstart; p<pend+1; p++) {
1195             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
1196               cols[cnt++] = k + nc*(slot + gnx*l + p);
1197             }
1198           }
1199         }
1200         rows[k] = k + nc*(slot);
1201       }
1202       if (removedups) {
1203         ierr = MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1204       } else {
1205         ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1206       }
1207     }
1208   }
1209   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1210   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1211   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1212   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1213   ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr);
1214   if (!mltog) {
1215     ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1216   }
1217 
1218   /*
1219     For each node in the grid: we get the neighbors in the local (on processor ordering
1220     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1221     PETSc ordering.
1222   */
1223   if (!da->prealloc_only) {
1224     ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr);
1225     for (i=xs; i<xs+nx; i++) {
1226 
1227       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1228       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1229 
1230       for (j=ys; j<ys+ny; j++) {
1231         slot = i - gxs + gnx*(j - gys);
1232 
1233         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1234         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1235 
1236         cnt = 0;
1237         for (k=0; k<nc; k++) {
1238           for (l=lstart; l<lend+1; l++) {
1239             for (p=pstart; p<pend+1; p++) {
1240               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
1241                 cols[cnt++] = k + nc*(slot + gnx*l + p);
1242               }
1243             }
1244           }
1245           rows[k] = k + nc*(slot);
1246         }
1247         ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1248       }
1249     }
1250     ierr = PetscFree(values);CHKERRQ(ierr);
1251     /* do not copy values to GPU since they are all zero and not yet needed there */
1252     ierr = MatPinToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1253     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1254     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1255     ierr = MatPinToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1256     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1257   }
1258   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1259   PetscFunctionReturn(0);
1260 }
1261 
1262 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da,Mat J)
1263 {
1264   PetscErrorCode         ierr;
1265   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1266   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt,maxcnt = 0,l,p,M,N;
1267   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
1268   DM_DA                  *dd = (DM_DA*)da->data;
1269   PetscInt               ifill_col,*ofill = dd->ofill, *dfill = dd->dfill;
1270   MPI_Comm               comm;
1271   PetscScalar            *values;
1272   DMBoundaryType         bx,by;
1273   ISLocalToGlobalMapping ltog;
1274   DMDAStencilType        st;
1275   PetscBool              removedups = PETSC_FALSE;
1276 
1277   PetscFunctionBegin;
1278   /*
1279          nc - number of components per grid point
1280          col - number of colors needed in one direction for single component problem
1281 
1282   */
1283   ierr = DMDAGetInfo(da,&dim,&m,&n,&M,&N,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
1284   col  = 2*s + 1;
1285   /*
1286        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1287        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1288   */
1289   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1290   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1291   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
1292   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
1293   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1294 
1295   ierr = PetscMalloc1(col*col*nc,&cols);CHKERRQ(ierr);
1296   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1297 
1298   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1299   /* determine the matrix preallocation information */
1300   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
1301   for (i=xs; i<xs+nx; i++) {
1302 
1303     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1304     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1305 
1306     for (j=ys; j<ys+ny; j++) {
1307       slot = i - gxs + gnx*(j - gys);
1308 
1309       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1310       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1311 
1312       for (k=0; k<nc; k++) {
1313         cnt = 0;
1314         for (l=lstart; l<lend+1; l++) {
1315           for (p=pstart; p<pend+1; p++) {
1316             if (l || p) {
1317               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
1318                 for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p);
1319               }
1320             } else {
1321               if (dfill) {
1322                 for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p);
1323               } else {
1324                 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p);
1325               }
1326             }
1327           }
1328         }
1329         row    = k + nc*(slot);
1330         maxcnt = PetscMax(maxcnt,cnt);
1331         if (removedups) {
1332           ierr   = MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1333         } else {
1334           ierr   = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1335         }
1336       }
1337     }
1338   }
1339   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1340   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1341   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1342   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1343 
1344   /*
1345     For each node in the grid: we get the neighbors in the local (on processor ordering
1346     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1347     PETSc ordering.
1348   */
1349   if (!da->prealloc_only) {
1350     ierr = PetscCalloc1(maxcnt,&values);CHKERRQ(ierr);
1351     for (i=xs; i<xs+nx; i++) {
1352 
1353       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1354       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1355 
1356       for (j=ys; j<ys+ny; j++) {
1357         slot = i - gxs + gnx*(j - gys);
1358 
1359         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1360         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1361 
1362         for (k=0; k<nc; k++) {
1363           cnt = 0;
1364           for (l=lstart; l<lend+1; l++) {
1365             for (p=pstart; p<pend+1; p++) {
1366               if (l || p) {
1367                 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
1368                   for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p);
1369                 }
1370               } else {
1371                 if (dfill) {
1372                   for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p);
1373                 } else {
1374                   for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p);
1375                 }
1376               }
1377             }
1378           }
1379           row  = k + nc*(slot);
1380           ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1381         }
1382       }
1383     }
1384     ierr = PetscFree(values);CHKERRQ(ierr);
1385     /* do not copy values to GPU since they are all zero and not yet needed there */
1386     ierr = MatPinToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1387     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1388     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1389     ierr = MatPinToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1390     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1391   }
1392   ierr = PetscFree(cols);CHKERRQ(ierr);
1393   PetscFunctionReturn(0);
1394 }
1395 
1396 /* ---------------------------------------------------------------------------------*/
1397 
1398 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da,Mat J)
1399 {
1400   PetscErrorCode         ierr;
1401   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1402   PetscInt               m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL;
1403   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
1404   MPI_Comm               comm;
1405   PetscScalar            *values;
1406   DMBoundaryType         bx,by,bz;
1407   ISLocalToGlobalMapping ltog,mltog;
1408   DMDAStencilType        st;
1409   PetscBool              removedups = PETSC_FALSE;
1410 
1411   PetscFunctionBegin;
1412   /*
1413          nc - number of components per grid point
1414          col - number of colors needed in one direction for single component problem
1415 
1416   */
1417   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1418   col  = 2*s + 1;
1419 
1420   /*
1421        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1422        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1423   */
1424   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1425   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1426   if (P == 1 && 2*s >= p) removedups = PETSC_TRUE;
1427 
1428   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1429   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1430   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1431 
1432   ierr = PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols);CHKERRQ(ierr);
1433   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1434 
1435   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1436   /* determine the matrix preallocation information */
1437   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1438   for (i=xs; i<xs+nx; i++) {
1439     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1440     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1441     for (j=ys; j<ys+ny; j++) {
1442       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1443       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1444       for (k=zs; k<zs+nz; k++) {
1445         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1446         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1447 
1448         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1449 
1450         cnt = 0;
1451         for (l=0; l<nc; l++) {
1452           for (ii=istart; ii<iend+1; ii++) {
1453             for (jj=jstart; jj<jend+1; jj++) {
1454               for (kk=kstart; kk<kend+1; kk++) {
1455                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1456                   cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1457                 }
1458               }
1459             }
1460           }
1461           rows[l] = l + nc*(slot);
1462         }
1463         if (removedups) {
1464           ierr = MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1465         } else {
1466           ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1467         }
1468       }
1469     }
1470   }
1471   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1472   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1473   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1474   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1475   ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr);
1476   if (!mltog) {
1477     ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1478   }
1479 
1480   /*
1481     For each node in the grid: we get the neighbors in the local (on processor ordering
1482     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1483     PETSc ordering.
1484   */
1485   if (!da->prealloc_only) {
1486     ierr = PetscCalloc1(col*col*col*nc*nc*nc,&values);CHKERRQ(ierr);
1487     for (i=xs; i<xs+nx; i++) {
1488       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1489       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1490       for (j=ys; j<ys+ny; j++) {
1491         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1492         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1493         for (k=zs; k<zs+nz; k++) {
1494           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1495           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1496 
1497           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1498 
1499           cnt = 0;
1500           for (l=0; l<nc; l++) {
1501             for (ii=istart; ii<iend+1; ii++) {
1502               for (jj=jstart; jj<jend+1; jj++) {
1503                 for (kk=kstart; kk<kend+1; kk++) {
1504                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1505                     cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1506                   }
1507                 }
1508               }
1509             }
1510             rows[l] = l + nc*(slot);
1511           }
1512           ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1513         }
1514       }
1515     }
1516     ierr = PetscFree(values);CHKERRQ(ierr);
1517     /* do not copy values to GPU since they are all zero and not yet needed there */
1518     ierr = MatPinToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1519     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1520     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1521     ierr = MatPinToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1522     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1523   }
1524   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1525   PetscFunctionReturn(0);
1526 }
1527 
1528 /* ---------------------------------------------------------------------------------*/
1529 
1530 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da,Mat J)
1531 {
1532   PetscErrorCode         ierr;
1533   DM_DA                  *dd = (DM_DA*)da->data;
1534   PetscInt               xs,nx,i,j,gxs,gnx,row,k,l;
1535   PetscInt               m,dim,s,*cols = NULL,nc,cnt,maxcnt = 0,*ocols;
1536   PetscInt               *ofill = dd->ofill,*dfill = dd->dfill;
1537   PetscScalar            *values;
1538   DMBoundaryType         bx;
1539   ISLocalToGlobalMapping ltog;
1540   PetscMPIInt            rank,size;
1541 
1542   PetscFunctionBegin;
1543   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)da),&rank);CHKERRQ(ierr);
1544   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)da),&size);CHKERRQ(ierr);
1545 
1546   /*
1547          nc - number of components per grid point
1548 
1549   */
1550   ierr = DMDAGetInfo(da,&dim,&m,0,0,0,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr);
1551   if (s > 1) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Matrix creation for 1d not implemented correctly for stencil width larger than 1");
1552   ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
1553   ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
1554 
1555   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1556   ierr = PetscCalloc2(nx*nc,&cols,nx*nc,&ocols);CHKERRQ(ierr);
1557 
1558   /*
1559         note should be smaller for first and last process with no periodic
1560         does not handle dfill
1561   */
1562   cnt = 0;
1563   /* coupling with process to the left */
1564   for (i=0; i<s; i++) {
1565     for (j=0; j<nc; j++) {
1566       ocols[cnt] = ((!rank) ? 0 : (s - i)*(ofill[j+1] - ofill[j]));
1567       cols[cnt]  = dfill[j+1] - dfill[j] + (s + i)*(ofill[j+1] - ofill[j]);
1568       if (!rank && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1569         if (size > 1) ocols[cnt] += (s - i)*(ofill[j+1] - ofill[j]);
1570         else cols[cnt] += (s - i)*(ofill[j+1] - ofill[j]);
1571       }
1572       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1573       cnt++;
1574     }
1575   }
1576   for (i=s; i<nx-s; i++) {
1577     for (j=0; j<nc; j++) {
1578       cols[cnt] = dfill[j+1] - dfill[j] + 2*s*(ofill[j+1] - ofill[j]);
1579       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1580       cnt++;
1581     }
1582   }
1583   /* coupling with process to the right */
1584   for (i=nx-s; i<nx; i++) {
1585     for (j=0; j<nc; j++) {
1586       ocols[cnt] = ((rank == (size-1)) ? 0 : (i - nx + s + 1)*(ofill[j+1] - ofill[j]));
1587       cols[cnt]  = dfill[j+1] - dfill[j] + (s + nx - i - 1)*(ofill[j+1] - ofill[j]);
1588       if ((rank == size-1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1589         if (size > 1) ocols[cnt] += (i - nx + s + 1)*(ofill[j+1] - ofill[j]);
1590         else cols[cnt] += (i - nx + s + 1)*(ofill[j+1] - ofill[j]);
1591       }
1592       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1593       cnt++;
1594     }
1595   }
1596 
1597   ierr = MatSeqAIJSetPreallocation(J,0,cols);CHKERRQ(ierr);
1598   ierr = MatMPIAIJSetPreallocation(J,0,cols,0,ocols);CHKERRQ(ierr);
1599   ierr = PetscFree2(cols,ocols);CHKERRQ(ierr);
1600 
1601   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1602   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1603 
1604   /*
1605     For each node in the grid: we get the neighbors in the local (on processor ordering
1606     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1607     PETSc ordering.
1608   */
1609   if (!da->prealloc_only) {
1610     ierr = PetscCalloc2(maxcnt,&values,maxcnt,&cols);CHKERRQ(ierr);
1611 
1612     row = xs*nc;
1613     /* coupling with process to the left */
1614     for (i=xs; i<xs+s; i++) {
1615       for (j=0; j<nc; j++) {
1616         cnt = 0;
1617         if (rank) {
1618           for (l=0; l<s; l++) {
1619             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1620           }
1621         }
1622         if (!rank && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1623           for (l=0; l<s; l++) {
1624             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (m + i - s - l)*nc + ofill[k];
1625           }
1626         }
1627         if (dfill) {
1628           for (k=dfill[j]; k<dfill[j+1]; k++) {
1629             cols[cnt++] = i*nc + dfill[k];
1630           }
1631         } else {
1632           for (k=0; k<nc; k++) {
1633             cols[cnt++] = i*nc + k;
1634           }
1635         }
1636         for (l=0; l<s; l++) {
1637           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1638         }
1639         ierr = MatSetValues(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1640         row++;
1641       }
1642     }
1643     for (i=xs+s; i<xs+nx-s; i++) {
1644       for (j=0; j<nc; j++) {
1645         cnt = 0;
1646         for (l=0; l<s; l++) {
1647           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1648         }
1649         if (dfill) {
1650           for (k=dfill[j]; k<dfill[j+1]; k++) {
1651             cols[cnt++] = i*nc + dfill[k];
1652           }
1653         } else {
1654           for (k=0; k<nc; k++) {
1655             cols[cnt++] = i*nc + k;
1656           }
1657         }
1658         for (l=0; l<s; l++) {
1659           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1660         }
1661         ierr = MatSetValues(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1662         row++;
1663       }
1664     }
1665     /* coupling with process to the right */
1666     for (i=xs+nx-s; i<xs+nx; i++) {
1667       for (j=0; j<nc; j++) {
1668         cnt = 0;
1669         for (l=0; l<s; l++) {
1670           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1671         }
1672         if (dfill) {
1673           for (k=dfill[j]; k<dfill[j+1]; k++) {
1674             cols[cnt++] = i*nc + dfill[k];
1675           }
1676         } else {
1677           for (k=0; k<nc; k++) {
1678             cols[cnt++] = i*nc + k;
1679           }
1680         }
1681         if (rank < size-1) {
1682           for (l=0; l<s; l++) {
1683             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1684           }
1685         }
1686         if ((rank == size-1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1687           for (l=0; l<s; l++) {
1688             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s - l - m + 2)*nc + ofill[k];
1689           }
1690         }
1691         ierr = MatSetValues(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1692         row++;
1693       }
1694     }
1695     ierr = PetscFree2(values,cols);CHKERRQ(ierr);
1696     /* do not copy values to GPU since they are all zero and not yet needed there */
1697     ierr = MatPinToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1698     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1699     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1700     ierr = MatPinToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1701     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1702   }
1703   PetscFunctionReturn(0);
1704 }
1705 
1706 /* ---------------------------------------------------------------------------------*/
1707 
1708 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da,Mat J)
1709 {
1710   PetscErrorCode         ierr;
1711   PetscInt               xs,nx,i,i1,slot,gxs,gnx;
1712   PetscInt               m,dim,s,*cols = NULL,nc,*rows = NULL,col,cnt,l;
1713   PetscInt               istart,iend;
1714   PetscScalar            *values;
1715   DMBoundaryType         bx;
1716   ISLocalToGlobalMapping ltog,mltog;
1717 
1718   PetscFunctionBegin;
1719   /*
1720          nc - number of components per grid point
1721          col - number of colors needed in one direction for single component problem
1722 
1723   */
1724   ierr = DMDAGetInfo(da,&dim,&m,0,0,0,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr);
1725   col  = 2*s + 1;
1726 
1727   ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
1728   ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
1729 
1730   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1731   ierr = MatSeqAIJSetPreallocation(J,col*nc,0);CHKERRQ(ierr);
1732   ierr = MatMPIAIJSetPreallocation(J,col*nc,0,col*nc,0);CHKERRQ(ierr);
1733 
1734   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1735   ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr);
1736   if (!mltog) {
1737     ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1738   }
1739 
1740   /*
1741     For each node in the grid: we get the neighbors in the local (on processor ordering
1742     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1743     PETSc ordering.
1744   */
1745   if (!da->prealloc_only) {
1746     ierr = PetscMalloc2(nc,&rows,col*nc*nc,&cols);CHKERRQ(ierr);
1747     ierr = PetscCalloc1(col*nc*nc,&values);CHKERRQ(ierr);
1748     for (i=xs; i<xs+nx; i++) {
1749       istart = PetscMax(-s,gxs - i);
1750       iend   = PetscMin(s,gxs + gnx - i - 1);
1751       slot   = i - gxs;
1752 
1753       cnt = 0;
1754       for (l=0; l<nc; l++) {
1755         for (i1=istart; i1<iend+1; i1++) {
1756           cols[cnt++] = l + nc*(slot + i1);
1757         }
1758         rows[l] = l + nc*(slot);
1759       }
1760       ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1761     }
1762     ierr = PetscFree(values);CHKERRQ(ierr);
1763     /* do not copy values to GPU since they are all zero and not yet needed there */
1764     ierr = MatPinToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1765     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1766     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1767     ierr = MatPinToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1768     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1769     ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1770   }
1771   PetscFunctionReturn(0);
1772 }
1773 
1774 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da,Mat J)
1775 {
1776   PetscErrorCode         ierr;
1777   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1778   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
1779   PetscInt               istart,iend,jstart,jend,ii,jj;
1780   MPI_Comm               comm;
1781   PetscScalar            *values;
1782   DMBoundaryType         bx,by;
1783   DMDAStencilType        st;
1784   ISLocalToGlobalMapping ltog;
1785 
1786   PetscFunctionBegin;
1787   /*
1788      nc - number of components per grid point
1789      col - number of colors needed in one direction for single component problem
1790   */
1791   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
1792   col  = 2*s + 1;
1793 
1794   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
1795   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
1796   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1797 
1798   ierr = PetscMalloc1(col*col*nc*nc,&cols);CHKERRQ(ierr);
1799 
1800   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1801 
1802   /* determine the matrix preallocation information */
1803   ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
1804   for (i=xs; i<xs+nx; i++) {
1805     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1806     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1807     for (j=ys; j<ys+ny; j++) {
1808       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1809       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1810       slot   = i - gxs + gnx*(j - gys);
1811 
1812       /* Find block columns in block row */
1813       cnt = 0;
1814       for (ii=istart; ii<iend+1; ii++) {
1815         for (jj=jstart; jj<jend+1; jj++) {
1816           if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1817             cols[cnt++] = slot + ii + gnx*jj;
1818           }
1819         }
1820       }
1821       ierr = MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1822     }
1823   }
1824   ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1825   ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1826   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1827 
1828   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1829 
1830   /*
1831     For each node in the grid: we get the neighbors in the local (on processor ordering
1832     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1833     PETSc ordering.
1834   */
1835   if (!da->prealloc_only) {
1836     ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr);
1837     for (i=xs; i<xs+nx; i++) {
1838       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1839       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1840       for (j=ys; j<ys+ny; j++) {
1841         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1842         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1843         slot = i - gxs + gnx*(j - gys);
1844         cnt  = 0;
1845         for (ii=istart; ii<iend+1; ii++) {
1846           for (jj=jstart; jj<jend+1; jj++) {
1847             if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1848               cols[cnt++] = slot + ii + gnx*jj;
1849             }
1850           }
1851         }
1852         ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1853       }
1854     }
1855     ierr = PetscFree(values);CHKERRQ(ierr);
1856     /* do not copy values to GPU since they are all zero and not yet needed there */
1857     ierr = MatPinToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1858     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1859     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1860     ierr = MatPinToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1861     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1862   }
1863   ierr = PetscFree(cols);CHKERRQ(ierr);
1864   PetscFunctionReturn(0);
1865 }
1866 
1867 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da,Mat J)
1868 {
1869   PetscErrorCode         ierr;
1870   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1871   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
1872   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1873   MPI_Comm               comm;
1874   PetscScalar            *values;
1875   DMBoundaryType         bx,by,bz;
1876   DMDAStencilType        st;
1877   ISLocalToGlobalMapping ltog;
1878 
1879   PetscFunctionBegin;
1880   /*
1881          nc - number of components per grid point
1882          col - number of colors needed in one direction for single component problem
1883 
1884   */
1885   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1886   col  = 2*s + 1;
1887 
1888   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1889   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1890   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1891 
1892   ierr = PetscMalloc1(col*col*col,&cols);CHKERRQ(ierr);
1893 
1894   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1895 
1896   /* determine the matrix preallocation information */
1897   ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1898   for (i=xs; i<xs+nx; i++) {
1899     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1900     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1901     for (j=ys; j<ys+ny; j++) {
1902       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1903       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1904       for (k=zs; k<zs+nz; k++) {
1905         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1906         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1907 
1908         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1909 
1910         /* Find block columns in block row */
1911         cnt = 0;
1912         for (ii=istart; ii<iend+1; ii++) {
1913           for (jj=jstart; jj<jend+1; jj++) {
1914             for (kk=kstart; kk<kend+1; kk++) {
1915               if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1916                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1917               }
1918             }
1919           }
1920         }
1921         ierr = MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1922       }
1923     }
1924   }
1925   ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1926   ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1927   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1928 
1929   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1930 
1931   /*
1932     For each node in the grid: we get the neighbors in the local (on processor ordering
1933     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1934     PETSc ordering.
1935   */
1936   if (!da->prealloc_only) {
1937     ierr = PetscCalloc1(col*col*col*nc*nc,&values);CHKERRQ(ierr);
1938     for (i=xs; i<xs+nx; i++) {
1939       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1940       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1941       for (j=ys; j<ys+ny; j++) {
1942         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1943         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1944         for (k=zs; k<zs+nz; k++) {
1945           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1946           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1947 
1948           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1949 
1950           cnt = 0;
1951           for (ii=istart; ii<iend+1; ii++) {
1952             for (jj=jstart; jj<jend+1; jj++) {
1953               for (kk=kstart; kk<kend+1; kk++) {
1954                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1955                   cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1956                 }
1957               }
1958             }
1959           }
1960           ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1961         }
1962       }
1963     }
1964     ierr = PetscFree(values);CHKERRQ(ierr);
1965     /* do not copy values to GPU since they are all zero and not yet needed there */
1966     ierr = MatPinToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1967     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1968     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1969     ierr = MatPinToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1970     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1971   }
1972   ierr = PetscFree(cols);CHKERRQ(ierr);
1973   PetscFunctionReturn(0);
1974 }
1975 
1976 /*
1977   This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to
1978   identify in the local ordering with periodic domain.
1979 */
1980 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog,PetscInt *row,PetscInt *cnt,PetscInt col[])
1981 {
1982   PetscErrorCode ierr;
1983   PetscInt       i,n;
1984 
1985   PetscFunctionBegin;
1986   ierr = ISLocalToGlobalMappingApplyBlock(ltog,1,row,row);CHKERRQ(ierr);
1987   ierr = ISLocalToGlobalMappingApplyBlock(ltog,*cnt,col,col);CHKERRQ(ierr);
1988   for (i=0,n=0; i<*cnt; i++) {
1989     if (col[i] >= *row) col[n++] = col[i];
1990   }
1991   *cnt = n;
1992   PetscFunctionReturn(0);
1993 }
1994 
1995 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da,Mat J)
1996 {
1997   PetscErrorCode         ierr;
1998   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1999   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
2000   PetscInt               istart,iend,jstart,jend,ii,jj;
2001   MPI_Comm               comm;
2002   PetscScalar            *values;
2003   DMBoundaryType         bx,by;
2004   DMDAStencilType        st;
2005   ISLocalToGlobalMapping ltog;
2006 
2007   PetscFunctionBegin;
2008   /*
2009      nc - number of components per grid point
2010      col - number of colors needed in one direction for single component problem
2011   */
2012   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
2013   col  = 2*s + 1;
2014 
2015   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
2016   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
2017   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
2018 
2019   ierr = PetscMalloc1(col*col*nc*nc,&cols);CHKERRQ(ierr);
2020 
2021   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
2022 
2023   /* determine the matrix preallocation information */
2024   ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
2025   for (i=xs; i<xs+nx; i++) {
2026     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2027     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2028     for (j=ys; j<ys+ny; j++) {
2029       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2030       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2031       slot   = i - gxs + gnx*(j - gys);
2032 
2033       /* Find block columns in block row */
2034       cnt = 0;
2035       for (ii=istart; ii<iend+1; ii++) {
2036         for (jj=jstart; jj<jend+1; jj++) {
2037           if (st == DMDA_STENCIL_BOX || !ii || !jj) {
2038             cols[cnt++] = slot + ii + gnx*jj;
2039           }
2040         }
2041       }
2042       ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
2043       ierr = MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
2044     }
2045   }
2046   ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
2047   ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
2048   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2049 
2050   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
2051 
2052   /*
2053     For each node in the grid: we get the neighbors in the local (on processor ordering
2054     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2055     PETSc ordering.
2056   */
2057   if (!da->prealloc_only) {
2058     ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr);
2059     for (i=xs; i<xs+nx; i++) {
2060       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2061       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2062       for (j=ys; j<ys+ny; j++) {
2063         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2064         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2065         slot   = i - gxs + gnx*(j - gys);
2066 
2067         /* Find block columns in block row */
2068         cnt = 0;
2069         for (ii=istart; ii<iend+1; ii++) {
2070           for (jj=jstart; jj<jend+1; jj++) {
2071             if (st == DMDA_STENCIL_BOX || !ii || !jj) {
2072               cols[cnt++] = slot + ii + gnx*jj;
2073             }
2074           }
2075         }
2076         ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
2077         ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
2078       }
2079     }
2080     ierr = PetscFree(values);CHKERRQ(ierr);
2081     /* do not copy values to GPU since they are all zero and not yet needed there */
2082     ierr = MatPinToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
2083     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2084     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2085     ierr = MatPinToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
2086     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2087   }
2088   ierr = PetscFree(cols);CHKERRQ(ierr);
2089   PetscFunctionReturn(0);
2090 }
2091 
2092 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da,Mat J)
2093 {
2094   PetscErrorCode         ierr;
2095   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
2096   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
2097   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
2098   MPI_Comm               comm;
2099   PetscScalar            *values;
2100   DMBoundaryType         bx,by,bz;
2101   DMDAStencilType        st;
2102   ISLocalToGlobalMapping ltog;
2103 
2104   PetscFunctionBegin;
2105   /*
2106      nc - number of components per grid point
2107      col - number of colors needed in one direction for single component problem
2108   */
2109   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
2110   col  = 2*s + 1;
2111 
2112   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
2113   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
2114   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
2115 
2116   /* create the matrix */
2117   ierr = PetscMalloc1(col*col*col,&cols);CHKERRQ(ierr);
2118 
2119   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
2120 
2121   /* determine the matrix preallocation information */
2122   ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
2123   for (i=xs; i<xs+nx; i++) {
2124     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2125     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2126     for (j=ys; j<ys+ny; j++) {
2127       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2128       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2129       for (k=zs; k<zs+nz; k++) {
2130         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2131         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2132 
2133         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2134 
2135         /* Find block columns in block row */
2136         cnt = 0;
2137         for (ii=istart; ii<iend+1; ii++) {
2138           for (jj=jstart; jj<jend+1; jj++) {
2139             for (kk=kstart; kk<kend+1; kk++) {
2140               if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
2141                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
2142               }
2143             }
2144           }
2145         }
2146         ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
2147         ierr = MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
2148       }
2149     }
2150   }
2151   ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
2152   ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
2153   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2154 
2155   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
2156 
2157   /*
2158     For each node in the grid: we get the neighbors in the local (on processor ordering
2159     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2160     PETSc ordering.
2161   */
2162   if (!da->prealloc_only) {
2163     ierr = PetscCalloc1(col*col*col*nc*nc,&values);CHKERRQ(ierr);
2164     for (i=xs; i<xs+nx; i++) {
2165       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2166       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2167       for (j=ys; j<ys+ny; j++) {
2168         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2169         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2170         for (k=zs; k<zs+nz; k++) {
2171           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2172           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2173 
2174           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2175 
2176           cnt = 0;
2177           for (ii=istart; ii<iend+1; ii++) {
2178             for (jj=jstart; jj<jend+1; jj++) {
2179               for (kk=kstart; kk<kend+1; kk++) {
2180                 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
2181                   cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
2182                 }
2183               }
2184             }
2185           }
2186           ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
2187           ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
2188         }
2189       }
2190     }
2191     ierr = PetscFree(values);CHKERRQ(ierr);
2192     /* do not copy values to GPU since they are all zero and not yet needed there */
2193     ierr = MatPinToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
2194     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2195     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2196     ierr = MatPinToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
2197     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2198   }
2199   ierr = PetscFree(cols);CHKERRQ(ierr);
2200   PetscFunctionReturn(0);
2201 }
2202 
2203 /* ---------------------------------------------------------------------------------*/
2204 
2205 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da,Mat J)
2206 {
2207   PetscErrorCode         ierr;
2208   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
2209   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt, maxcnt = 0,l,p,*dnz,*onz;
2210   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
2211   DM_DA                  *dd = (DM_DA*)da->data;
2212   PetscInt               ifill_col,*dfill = dd->dfill,*ofill = dd->ofill;
2213   MPI_Comm               comm;
2214   PetscScalar            *values;
2215   DMBoundaryType         bx,by,bz;
2216   ISLocalToGlobalMapping ltog;
2217   DMDAStencilType        st;
2218   PetscBool              removedups = PETSC_FALSE;
2219 
2220   PetscFunctionBegin;
2221   /*
2222          nc - number of components per grid point
2223          col - number of colors needed in one direction for single component problem
2224 
2225   */
2226   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
2227   col  = 2*s + 1;
2228   if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
2229                  by 2*stencil_width + 1\n");
2230   if (by == DM_BOUNDARY_PERIODIC && (n % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
2231                  by 2*stencil_width + 1\n");
2232   if (bz == DM_BOUNDARY_PERIODIC && (p % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
2233                  by 2*stencil_width + 1\n");
2234 
2235   /*
2236        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
2237        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
2238   */
2239   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
2240   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
2241   if (P == 1 && 2*s >= p) removedups = PETSC_TRUE;
2242 
2243   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
2244   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
2245   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
2246 
2247   ierr = PetscMalloc1(col*col*col*nc,&cols);CHKERRQ(ierr);
2248   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
2249 
2250   /* determine the matrix preallocation information */
2251   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
2252 
2253   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
2254   for (i=xs; i<xs+nx; i++) {
2255     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2256     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2257     for (j=ys; j<ys+ny; j++) {
2258       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2259       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2260       for (k=zs; k<zs+nz; k++) {
2261         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2262         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2263 
2264         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2265 
2266         for (l=0; l<nc; l++) {
2267           cnt = 0;
2268           for (ii=istart; ii<iend+1; ii++) {
2269             for (jj=jstart; jj<jend+1; jj++) {
2270               for (kk=kstart; kk<kend+1; kk++) {
2271                 if (ii || jj || kk) {
2272                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
2273                     for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2274                   }
2275                 } else {
2276                   if (dfill) {
2277                     for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2278                   } else {
2279                     for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2280                   }
2281                 }
2282               }
2283             }
2284           }
2285           row  = l + nc*(slot);
2286           maxcnt = PetscMax(maxcnt,cnt);
2287           if (removedups) {
2288             ierr = MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
2289           } else {
2290             ierr = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
2291           }
2292         }
2293       }
2294     }
2295   }
2296   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
2297   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
2298   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2299   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
2300 
2301   /*
2302     For each node in the grid: we get the neighbors in the local (on processor ordering
2303     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2304     PETSc ordering.
2305   */
2306   if (!da->prealloc_only) {
2307     ierr = PetscCalloc1(maxcnt,&values);CHKERRQ(ierr);
2308     for (i=xs; i<xs+nx; i++) {
2309       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2310       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2311       for (j=ys; j<ys+ny; j++) {
2312         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2313         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2314         for (k=zs; k<zs+nz; k++) {
2315           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2316           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2317 
2318           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2319 
2320           for (l=0; l<nc; l++) {
2321             cnt = 0;
2322             for (ii=istart; ii<iend+1; ii++) {
2323               for (jj=jstart; jj<jend+1; jj++) {
2324                 for (kk=kstart; kk<kend+1; kk++) {
2325                   if (ii || jj || kk) {
2326                     if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
2327                       for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2328                     }
2329                   } else {
2330                     if (dfill) {
2331                       for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2332                     } else {
2333                       for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2334                     }
2335                   }
2336                 }
2337               }
2338             }
2339             row  = l + nc*(slot);
2340             ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
2341           }
2342         }
2343       }
2344     }
2345     ierr = PetscFree(values);CHKERRQ(ierr);
2346     /* do not copy values to GPU since they are all zero and not yet needed there */
2347     ierr = MatPinToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
2348     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2349     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2350     ierr = MatPinToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
2351     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2352   }
2353   ierr = PetscFree(cols);CHKERRQ(ierr);
2354   PetscFunctionReturn(0);
2355 }
2356