xref: /petsc/src/dm/impls/da/fdda.c (revision 794657ff9f251af38d02df99aa8144ade4d60b36)
1 
2 #include <petsc/private/dmdaimpl.h> /*I      "petscdmda.h"     I*/
3 #include <petscmat.h>
4 
5 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM,ISColoringType,ISColoring*);
6 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM,ISColoringType,ISColoring*);
7 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM,ISColoringType,ISColoring*);
8 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM,ISColoringType,ISColoring*);
9 
10 /*
11    For ghost i that may be negative or greater than the upper bound this
12   maps it into the 0:m-1 range using periodicity
13 */
14 #define SetInRange(i,m) ((i < 0) ? m+i : ((i >= m) ? i-m : i))
15 
16 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill,PetscInt w,PetscInt **rfill)
17 {
18   PetscErrorCode ierr;
19   PetscInt       i,j,nz,*fill;
20 
21   PetscFunctionBegin;
22   if (!dfill) PetscFunctionReturn(0);
23 
24   /* count number nonzeros */
25   nz = 0;
26   for (i=0; i<w; i++) {
27     for (j=0; j<w; j++) {
28       if (dfill[w*i+j]) nz++;
29     }
30   }
31   ierr = PetscMalloc1(nz + w + 1,&fill);CHKERRQ(ierr);
32   /* construct modified CSR storage of nonzero structure */
33   /*  fill[0 -- w] marks starts of each row of column indices (and end of last row)
34    so fill[1] - fill[0] gives number of nonzeros in first row etc */
35   nz = w + 1;
36   for (i=0; i<w; i++) {
37     fill[i] = nz;
38     for (j=0; j<w; j++) {
39       if (dfill[w*i+j]) {
40         fill[nz] = j;
41         nz++;
42       }
43     }
44   }
45   fill[w] = nz;
46 
47   *rfill = fill;
48   PetscFunctionReturn(0);
49 }
50 
51 
52 static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse,PetscInt w,PetscInt **rfill)
53 {
54   PetscErrorCode ierr;
55   PetscInt       i,nz,*fill;
56 
57   PetscFunctionBegin;
58   if (!dfillsparse) PetscFunctionReturn(0);
59 
60   /* Determine number of non-zeros */
61   nz = (dfillsparse[w] - w - 1);
62 
63   /* Allocate space for our copy of the given sparse matrix representation. */
64   ierr = PetscMalloc1(nz + w + 1,&fill);CHKERRQ(ierr);
65 
66   /* Copy the given sparse matrix representation. */
67   for(i = 0; i < (nz + w + 1); ++i)
68   {
69     fill[i] = dfillsparse[i];
70   }
71 
72   *rfill = fill;
73   PetscFunctionReturn(0);
74 }
75 
76 
77 static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd)
78 {
79   PetscErrorCode ierr;
80   PetscInt       i,k,cnt = 1;
81 
82   PetscFunctionBegin;
83 
84   /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of
85    columns to the left with any nonzeros in them plus 1 */
86   ierr = PetscCalloc1(dd->w,&dd->ofillcols);CHKERRQ(ierr);
87   for (i=0; i<dd->w; i++) {
88     for (k=dd->ofill[i]; k<dd->ofill[i+1]; k++) dd->ofillcols[dd->ofill[k]] = 1;
89   }
90   for (i=0; i<dd->w; i++) {
91     if (dd->ofillcols[i]) {
92       dd->ofillcols[i] = cnt++;
93     }
94   }
95   PetscFunctionReturn(0);
96 }
97 
98 
99 
100 /*@
101     DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem
102     of the matrix returned by DMCreateMatrix().
103 
104     Logically Collective on DMDA
105 
106     Input Parameter:
107 +   da - the distributed array
108 .   dfill - the fill pattern in the diagonal block (may be NULL, means use dense block)
109 -   ofill - the fill pattern in the off-diagonal blocks
110 
111 
112     Level: developer
113 
114     Notes:
115     This only makes sense when you are doing multicomponent problems but using the
116        MPIAIJ matrix format
117 
118            The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries
119        representing coupling and 0 entries for missing coupling. For example
120 $             dfill[9] = {1, 0, 0,
121 $                         1, 1, 0,
122 $                         0, 1, 1}
123        means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with
124        itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the
125        diagonal block).
126 
127      DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
128      can be represented in the dfill, ofill format
129 
130    Contributed by Glenn Hammond
131 
132 .seealso DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly()
133 
134 @*/
135 PetscErrorCode  DMDASetBlockFills(DM da,const PetscInt *dfill,const PetscInt *ofill)
136 {
137   DM_DA          *dd = (DM_DA*)da->data;
138   PetscErrorCode ierr;
139 
140   PetscFunctionBegin;
141   /* save the given dfill and ofill information */
142   ierr = DMDASetBlockFills_Private(dfill,dd->w,&dd->dfill);CHKERRQ(ierr);
143   ierr = DMDASetBlockFills_Private(ofill,dd->w,&dd->ofill);CHKERRQ(ierr);
144 
145   /* count nonzeros in ofill columns */
146   ierr = DMDASetBlockFills_Private2(dd);CHKERRQ(ierr);
147 
148   PetscFunctionReturn(0);
149 }
150 
151 
152 /*@
153     DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem
154     of the matrix returned by DMCreateMatrix(), using sparse representations
155     of fill patterns.
156 
157     Logically Collective on DMDA
158 
159     Input Parameter:
160 +   da - the distributed array
161 .   dfill - the sparse fill pattern in the diagonal block (may be NULL, means use dense block)
162 -   ofill - the sparse fill pattern in the off-diagonal blocks
163 
164 
165     Level: developer
166 
167     Notes: This only makes sense when you are doing multicomponent problems but using the
168        MPIAIJ matrix format
169 
170            The format for dfill and ofill is a sparse representation of a
171            dof-by-dof matrix with 1 entries representing coupling and 0 entries
172            for missing coupling.  The sparse representation is a 1 dimensional
173            array of length nz + dof + 1, where nz is the number of non-zeros in
174            the matrix.  The first dof entries in the array give the
175            starting array indices of each row's items in the rest of the array,
176            the dof+1st item indicates the total number of nonzeros,
177            and the remaining nz items give the column indices of each of
178            the 1s within the logical 2D matrix.  Each row's items within
179            the array are the column indices of the 1s within that row
180            of the 2D matrix.  PETSc developers may recognize that this is the
181            same format as that computed by the DMDASetBlockFills_Private()
182            function from a dense 2D matrix representation.
183 
184      DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
185      can be represented in the dfill, ofill format
186 
187    Contributed by Philip C. Roth
188 
189 .seealso DMDASetBlockFills(), DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly()
190 
191 @*/
192 PetscErrorCode  DMDASetBlockFillsSparse(DM da,const PetscInt *dfillsparse,const PetscInt *ofillsparse)
193 {
194   DM_DA          *dd = (DM_DA*)da->data;
195   PetscErrorCode ierr;
196 
197   PetscFunctionBegin;
198   /* save the given dfill and ofill information */
199   ierr = DMDASetBlockFillsSparse_Private(dfillsparse,dd->w,&dd->dfill);CHKERRQ(ierr);
200   ierr = DMDASetBlockFillsSparse_Private(ofillsparse,dd->w,&dd->ofill);CHKERRQ(ierr);
201 
202   /* count nonzeros in ofill columns */
203   ierr = DMDASetBlockFills_Private2(dd);CHKERRQ(ierr);
204 
205   PetscFunctionReturn(0);
206 }
207 
208 
209 PetscErrorCode  DMCreateColoring_DA(DM da,ISColoringType ctype,ISColoring *coloring)
210 {
211   PetscErrorCode   ierr;
212   PetscInt         dim,m,n,p,nc;
213   DMBoundaryType bx,by,bz;
214   MPI_Comm         comm;
215   PetscMPIInt      size;
216   PetscBool        isBAIJ;
217   DM_DA            *dd = (DM_DA*)da->data;
218 
219   PetscFunctionBegin;
220   /*
221                                   m
222           ------------------------------------------------------
223          |                                                     |
224          |                                                     |
225          |               ----------------------                |
226          |               |                    |                |
227       n  |           yn  |                    |                |
228          |               |                    |                |
229          |               .---------------------                |
230          |             (xs,ys)     xn                          |
231          |            .                                        |
232          |         (gxs,gys)                                   |
233          |                                                     |
234           -----------------------------------------------------
235   */
236 
237   /*
238          nc - number of components per grid point
239          col - number of colors needed in one direction for single component problem
240 
241   */
242   ierr = DMDAGetInfo(da,&dim,0,0,0,&m,&n,&p,&nc,0,&bx,&by,&bz,0);CHKERRQ(ierr);
243 
244   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
245   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
246   if (ctype == IS_COLORING_LOCAL) {
247     if (size == 1) {
248       ctype = IS_COLORING_GLOBAL;
249     } else if (dim > 1) {
250       if ((m==1 && bx == DM_BOUNDARY_PERIODIC) || (n==1 && by == DM_BOUNDARY_PERIODIC) || (p==1 && bz == DM_BOUNDARY_PERIODIC)) {
251         SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain  on the same process");
252       }
253     }
254   }
255 
256   /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ
257      matrices is for the blocks, not the individual matrix elements  */
258   ierr = PetscStrcmp(da->mattype,MATBAIJ,&isBAIJ);CHKERRQ(ierr);
259   if (!isBAIJ) {ierr = PetscStrcmp(da->mattype,MATMPIBAIJ,&isBAIJ);CHKERRQ(ierr);}
260   if (!isBAIJ) {ierr = PetscStrcmp(da->mattype,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);}
261   if (isBAIJ) {
262     dd->w  = 1;
263     dd->xs = dd->xs/nc;
264     dd->xe = dd->xe/nc;
265     dd->Xs = dd->Xs/nc;
266     dd->Xe = dd->Xe/nc;
267   }
268 
269   /*
270      We do not provide a getcoloring function in the DMDA operations because
271    the basic DMDA does not know about matrices. We think of DMDA as being more
272    more low-level then matrices.
273   */
274   if (dim == 1) {
275     ierr = DMCreateColoring_DA_1d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
276   } else if (dim == 2) {
277     ierr =  DMCreateColoring_DA_2d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
278   } else if (dim == 3) {
279     ierr =  DMCreateColoring_DA_3d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
280   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not done for %D dimension, send us mail petsc-maint@mcs.anl.gov for code",dim);
281   if (isBAIJ) {
282     dd->w  = nc;
283     dd->xs = dd->xs*nc;
284     dd->xe = dd->xe*nc;
285     dd->Xs = dd->Xs*nc;
286     dd->Xe = dd->Xe*nc;
287   }
288   PetscFunctionReturn(0);
289 }
290 
291 /* ---------------------------------------------------------------------------------*/
292 
293 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
294 {
295   PetscErrorCode   ierr;
296   PetscInt         xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,M,N,dim,s,k,nc,col;
297   PetscInt         ncolors;
298   MPI_Comm         comm;
299   DMBoundaryType bx,by;
300   DMDAStencilType  st;
301   ISColoringValue  *colors;
302   DM_DA            *dd = (DM_DA*)da->data;
303 
304   PetscFunctionBegin;
305   /*
306          nc - number of components per grid point
307          col - number of colors needed in one direction for single component problem
308 
309   */
310   ierr = DMDAGetInfo(da,&dim,&m,&n,0,&M,&N,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
311   col  = 2*s + 1;
312   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
313   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
314   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
315 
316   /* special case as taught to us by Paul Hovland */
317   if (st == DMDA_STENCIL_STAR && s == 1) {
318     ierr = DMCreateColoring_DA_2d_5pt_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
319   } else {
320 
321     if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ2(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X (%d) is divisible\n\
322                                                             by 2*stencil_width + 1 (%d)\n", m, col);
323     if (by == DM_BOUNDARY_PERIODIC && (n % col)) SETERRQ2(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y (%d) is divisible\n\
324                                                             by 2*stencil_width + 1 (%d)\n", n, col);
325     if (ctype == IS_COLORING_GLOBAL) {
326       if (!dd->localcoloring) {
327         ierr = PetscMalloc1(nc*nx*ny,&colors);CHKERRQ(ierr);
328         ii   = 0;
329         for (j=ys; j<ys+ny; j++) {
330           for (i=xs; i<xs+nx; i++) {
331             for (k=0; k<nc; k++) {
332               colors[ii++] = k + nc*((i % col) + col*(j % col));
333             }
334           }
335         }
336         ncolors = nc + nc*(col-1 + col*(col-1));
337         ierr    = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
338       }
339       *coloring = dd->localcoloring;
340     } else if (ctype == IS_COLORING_LOCAL) {
341       if (!dd->ghostedcoloring) {
342         ierr = PetscMalloc1(nc*gnx*gny,&colors);CHKERRQ(ierr);
343         ii   = 0;
344         for (j=gys; j<gys+gny; j++) {
345           for (i=gxs; i<gxs+gnx; i++) {
346             for (k=0; k<nc; k++) {
347               /* the complicated stuff is to handle periodic boundaries */
348               colors[ii++] = k + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col));
349             }
350           }
351         }
352         ncolors = nc + nc*(col - 1 + col*(col-1));
353         ierr    = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
354         /* PetscIntView(ncolors,(PetscInt*)colors,0); */
355 
356         ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
357       }
358       *coloring = dd->ghostedcoloring;
359     } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
360   }
361   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
362   PetscFunctionReturn(0);
363 }
364 
365 /* ---------------------------------------------------------------------------------*/
366 
367 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
368 {
369   PetscErrorCode   ierr;
370   PetscInt         xs,ys,nx,ny,i,j,gxs,gys,gnx,gny,m,n,p,dim,s,k,nc,col,zs,gzs,ii,l,nz,gnz,M,N,P;
371   PetscInt         ncolors;
372   MPI_Comm         comm;
373   DMBoundaryType bx,by,bz;
374   DMDAStencilType  st;
375   ISColoringValue  *colors;
376   DM_DA            *dd = (DM_DA*)da->data;
377 
378   PetscFunctionBegin;
379   /*
380          nc - number of components per grid point
381          col - number of colors needed in one direction for single component problem
382 
383   */
384   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
385   col  = 2*s + 1;
386   if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
387                                                          by 2*stencil_width + 1\n");
388   if (by == DM_BOUNDARY_PERIODIC && (n % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
389                                                          by 2*stencil_width + 1\n");
390   if (bz == DM_BOUNDARY_PERIODIC && (p % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
391                                                          by 2*stencil_width + 1\n");
392 
393   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
394   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
395   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
396 
397   /* create the coloring */
398   if (ctype == IS_COLORING_GLOBAL) {
399     if (!dd->localcoloring) {
400       ierr = PetscMalloc1(nc*nx*ny*nz,&colors);CHKERRQ(ierr);
401       ii   = 0;
402       for (k=zs; k<zs+nz; k++) {
403         for (j=ys; j<ys+ny; j++) {
404           for (i=xs; i<xs+nx; i++) {
405             for (l=0; l<nc; l++) {
406               colors[ii++] = l + nc*((i % col) + col*(j % col) + col*col*(k % col));
407             }
408           }
409         }
410       }
411       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
412       ierr    = ISColoringCreate(comm,ncolors,nc*nx*ny*nz,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
413     }
414     *coloring = dd->localcoloring;
415   } else if (ctype == IS_COLORING_LOCAL) {
416     if (!dd->ghostedcoloring) {
417       ierr = PetscMalloc1(nc*gnx*gny*gnz,&colors);CHKERRQ(ierr);
418       ii   = 0;
419       for (k=gzs; k<gzs+gnz; k++) {
420         for (j=gys; j<gys+gny; j++) {
421           for (i=gxs; i<gxs+gnx; i++) {
422             for (l=0; l<nc; l++) {
423               /* the complicated stuff is to handle periodic boundaries */
424               colors[ii++] = l + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col) + col*col*(SetInRange(k,p) % col));
425             }
426           }
427         }
428       }
429       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
430       ierr    = ISColoringCreate(comm,ncolors,nc*gnx*gny*gnz,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
431       ierr    = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
432     }
433     *coloring = dd->ghostedcoloring;
434   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
435   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
436   PetscFunctionReturn(0);
437 }
438 
439 /* ---------------------------------------------------------------------------------*/
440 
441 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
442 {
443   PetscErrorCode   ierr;
444   PetscInt         xs,nx,i,i1,gxs,gnx,l,m,M,dim,s,nc,col;
445   PetscInt         ncolors;
446   MPI_Comm         comm;
447   DMBoundaryType bx;
448   ISColoringValue  *colors;
449   DM_DA            *dd = (DM_DA*)da->data;
450 
451   PetscFunctionBegin;
452   /*
453          nc - number of components per grid point
454          col - number of colors needed in one direction for single component problem
455 
456   */
457   ierr = DMDAGetInfo(da,&dim,&m,0,0,&M,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr);
458   col  = 2*s + 1;
459 
460   if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ2(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points %d is divisible\n\
461                                                           by 2*stencil_width + 1 %d\n",(int)m,(int)col);
462 
463   ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
464   ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
465   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
466 
467   /* create the coloring */
468   if (ctype == IS_COLORING_GLOBAL) {
469     if (!dd->localcoloring) {
470       ierr = PetscMalloc1(nc*nx,&colors);CHKERRQ(ierr);
471       if (dd->ofillcols) {
472         PetscInt tc = 0;
473         for (i=0; i<nc; i++) tc += (PetscInt) (dd->ofillcols[i] > 0);
474         i1 = 0;
475         for (i=xs; i<xs+nx; i++) {
476           for (l=0; l<nc; l++) {
477             if (dd->ofillcols[l] && (i % col)) {
478               colors[i1++] =  nc - 1 + tc*((i % col) - 1) + dd->ofillcols[l];
479             } else {
480               colors[i1++] = l;
481             }
482           }
483         }
484         ncolors = nc + 2*s*tc;
485       } else {
486         i1 = 0;
487         for (i=xs; i<xs+nx; i++) {
488           for (l=0; l<nc; l++) {
489             colors[i1++] = l + nc*(i % col);
490           }
491         }
492         ncolors = nc + nc*(col-1);
493       }
494       ierr = ISColoringCreate(comm,ncolors,nc*nx,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
495     }
496     *coloring = dd->localcoloring;
497   } else if (ctype == IS_COLORING_LOCAL) {
498     if (!dd->ghostedcoloring) {
499       ierr = PetscMalloc1(nc*gnx,&colors);CHKERRQ(ierr);
500       i1   = 0;
501       for (i=gxs; i<gxs+gnx; i++) {
502         for (l=0; l<nc; l++) {
503           /* the complicated stuff is to handle periodic boundaries */
504           colors[i1++] = l + nc*(SetInRange(i,m) % col);
505         }
506       }
507       ncolors = nc + nc*(col-1);
508       ierr    = ISColoringCreate(comm,ncolors,nc*gnx,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
509       ierr    = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
510     }
511     *coloring = dd->ghostedcoloring;
512   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
513   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
514   PetscFunctionReturn(0);
515 }
516 
517 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
518 {
519   PetscErrorCode   ierr;
520   PetscInt         xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,dim,s,k,nc;
521   PetscInt         ncolors;
522   MPI_Comm         comm;
523   DMBoundaryType bx,by;
524   ISColoringValue  *colors;
525   DM_DA            *dd = (DM_DA*)da->data;
526 
527   PetscFunctionBegin;
528   /*
529          nc - number of components per grid point
530          col - number of colors needed in one direction for single component problem
531 
532   */
533   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,0);CHKERRQ(ierr);
534   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
535   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
536   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
537 
538   if (bx == DM_BOUNDARY_PERIODIC && (m % 5)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible by 5\n");
539   if (by == DM_BOUNDARY_PERIODIC && (n % 5)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible by 5\n");
540 
541   /* create the coloring */
542   if (ctype == IS_COLORING_GLOBAL) {
543     if (!dd->localcoloring) {
544       ierr = PetscMalloc1(nc*nx*ny,&colors);CHKERRQ(ierr);
545       ii   = 0;
546       for (j=ys; j<ys+ny; j++) {
547         for (i=xs; i<xs+nx; i++) {
548           for (k=0; k<nc; k++) {
549             colors[ii++] = k + nc*((3*j+i) % 5);
550           }
551         }
552       }
553       ncolors = 5*nc;
554       ierr    = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
555     }
556     *coloring = dd->localcoloring;
557   } else if (ctype == IS_COLORING_LOCAL) {
558     if (!dd->ghostedcoloring) {
559       ierr = PetscMalloc1(nc*gnx*gny,&colors);CHKERRQ(ierr);
560       ii = 0;
561       for (j=gys; j<gys+gny; j++) {
562         for (i=gxs; i<gxs+gnx; i++) {
563           for (k=0; k<nc; k++) {
564             colors[ii++] = k + nc*((3*SetInRange(j,n) + SetInRange(i,m)) % 5);
565           }
566         }
567       }
568       ncolors = 5*nc;
569       ierr    = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
570       ierr    = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
571     }
572     *coloring = dd->ghostedcoloring;
573   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
574   PetscFunctionReturn(0);
575 }
576 
577 /* =========================================================================== */
578 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM,Mat);
579 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM,Mat);
580 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM,Mat);
581 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM,Mat);
582 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM,Mat);
583 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM,Mat);
584 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM,Mat);
585 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM,Mat);
586 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM,Mat);
587 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM,Mat);
588 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM,Mat);
589 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM,Mat);
590 extern PetscErrorCode DMCreateMatrix_DA_IS(DM,Mat);
591 
592 /*@C
593    MatSetupDM - Sets the DMDA that is to be used by the HYPRE_StructMatrix PETSc matrix
594 
595    Logically Collective on Mat
596 
597    Input Parameters:
598 +  mat - the matrix
599 -  da - the da
600 
601    Level: intermediate
602 
603 @*/
604 PetscErrorCode MatSetupDM(Mat mat,DM da)
605 {
606   PetscErrorCode ierr;
607 
608   PetscFunctionBegin;
609   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
610   PetscValidHeaderSpecificType(da,DM_CLASSID,1,DMDA);
611   ierr = PetscTryMethod(mat,"MatSetupDM_C",(Mat,DM),(mat,da));CHKERRQ(ierr);
612   PetscFunctionReturn(0);
613 }
614 
615 PetscErrorCode  MatView_MPI_DA(Mat A,PetscViewer viewer)
616 {
617   DM                da;
618   PetscErrorCode    ierr;
619   const char        *prefix;
620   Mat               Anatural;
621   AO                ao;
622   PetscInt          rstart,rend,*petsc,i;
623   IS                is;
624   MPI_Comm          comm;
625   PetscViewerFormat format;
626 
627   PetscFunctionBegin;
628   /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */
629   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
630   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(0);
631 
632   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
633   ierr = MatGetDM(A, &da);CHKERRQ(ierr);
634   if (!da) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
635 
636   ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);
637   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
638   ierr = PetscMalloc1(rend-rstart,&petsc);CHKERRQ(ierr);
639   for (i=rstart; i<rend; i++) petsc[i-rstart] = i;
640   ierr = AOApplicationToPetsc(ao,rend-rstart,petsc);CHKERRQ(ierr);
641   ierr = ISCreateGeneral(comm,rend-rstart,petsc,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
642 
643   /* call viewer on natural ordering */
644   ierr = MatCreateSubMatrix(A,is,is,MAT_INITIAL_MATRIX,&Anatural);CHKERRQ(ierr);
645   ierr = ISDestroy(&is);CHKERRQ(ierr);
646   ierr = PetscObjectGetOptionsPrefix((PetscObject)A,&prefix);CHKERRQ(ierr);
647   ierr = PetscObjectSetOptionsPrefix((PetscObject)Anatural,prefix);CHKERRQ(ierr);
648   ierr = PetscObjectSetName((PetscObject)Anatural,((PetscObject)A)->name);CHKERRQ(ierr);
649   ierr = MatView(Anatural,viewer);CHKERRQ(ierr);
650   ierr = MatDestroy(&Anatural);CHKERRQ(ierr);
651   PetscFunctionReturn(0);
652 }
653 
654 PetscErrorCode  MatLoad_MPI_DA(Mat A,PetscViewer viewer)
655 {
656   DM             da;
657   PetscErrorCode ierr;
658   Mat            Anatural,Aapp;
659   AO             ao;
660   PetscInt       rstart,rend,*app,i,m,n,M,N;
661   IS             is;
662   MPI_Comm       comm;
663 
664   PetscFunctionBegin;
665   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
666   ierr = MatGetDM(A, &da);CHKERRQ(ierr);
667   if (!da) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
668 
669   /* Load the matrix in natural ordering */
670   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Anatural);CHKERRQ(ierr);
671   ierr = MatSetType(Anatural,((PetscObject)A)->type_name);CHKERRQ(ierr);
672   ierr = MatGetSize(A,&M,&N);CHKERRQ(ierr);
673   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
674   ierr = MatSetSizes(Anatural,m,n,M,N);CHKERRQ(ierr);
675   ierr = MatLoad(Anatural,viewer);CHKERRQ(ierr);
676 
677   /* Map natural ordering to application ordering and create IS */
678   ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);
679   ierr = MatGetOwnershipRange(Anatural,&rstart,&rend);CHKERRQ(ierr);
680   ierr = PetscMalloc1(rend-rstart,&app);CHKERRQ(ierr);
681   for (i=rstart; i<rend; i++) app[i-rstart] = i;
682   ierr = AOPetscToApplication(ao,rend-rstart,app);CHKERRQ(ierr);
683   ierr = ISCreateGeneral(comm,rend-rstart,app,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
684 
685   /* Do permutation and replace header */
686   ierr = MatCreateSubMatrix(Anatural,is,is,MAT_INITIAL_MATRIX,&Aapp);CHKERRQ(ierr);
687   ierr = MatHeaderReplace(A,&Aapp);CHKERRQ(ierr);
688   ierr = ISDestroy(&is);CHKERRQ(ierr);
689   ierr = MatDestroy(&Anatural);CHKERRQ(ierr);
690   PetscFunctionReturn(0);
691 }
692 
693 PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J)
694 {
695   PetscErrorCode ierr;
696   PetscInt       dim,dof,nx,ny,nz,dims[3],starts[3],M,N,P;
697   Mat            A;
698   MPI_Comm       comm;
699   MatType        Atype;
700   PetscSection   section, sectionGlobal;
701   void           (*aij)(void)=NULL,(*baij)(void)=NULL,(*sbaij)(void)=NULL,(*sell)(void)=NULL,(*is)(void)=NULL;
702   MatType        mtype;
703   PetscMPIInt    size;
704   DM_DA          *dd = (DM_DA*)da->data;
705 
706   PetscFunctionBegin;
707   ierr = MatInitializePackage();CHKERRQ(ierr);
708   mtype = da->mattype;
709 
710   ierr = DMGetDefaultSection(da, &section);CHKERRQ(ierr);
711   if (section) {
712     PetscInt  bs = -1;
713     PetscInt  localSize;
714     PetscBool isShell, isBlock, isSeqBlock, isMPIBlock, isSymBlock, isSymSeqBlock, isSymMPIBlock, isSymmetric;
715 
716     ierr = DMGetDefaultGlobalSection(da, &sectionGlobal);CHKERRQ(ierr);
717     ierr = PetscSectionGetConstrainedStorageSize(sectionGlobal, &localSize);CHKERRQ(ierr);
718     ierr = MatCreate(PetscObjectComm((PetscObject)da),&A);CHKERRQ(ierr);
719     ierr = MatSetSizes(A,localSize,localSize,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
720     ierr = MatSetType(A,mtype);CHKERRQ(ierr);
721     ierr = PetscStrcmp(mtype,MATSHELL,&isShell);CHKERRQ(ierr);
722     ierr = PetscStrcmp(mtype,MATBAIJ,&isBlock);CHKERRQ(ierr);
723     ierr = PetscStrcmp(mtype,MATSEQBAIJ,&isSeqBlock);CHKERRQ(ierr);
724     ierr = PetscStrcmp(mtype,MATMPIBAIJ,&isMPIBlock);CHKERRQ(ierr);
725     ierr = PetscStrcmp(mtype,MATSBAIJ,&isSymBlock);CHKERRQ(ierr);
726     ierr = PetscStrcmp(mtype,MATSEQSBAIJ,&isSymSeqBlock);CHKERRQ(ierr);
727     ierr = PetscStrcmp(mtype,MATMPISBAIJ,&isSymMPIBlock);CHKERRQ(ierr);
728     /* Check for symmetric storage */
729     isSymmetric = (PetscBool) (isSymBlock || isSymSeqBlock || isSymMPIBlock);
730     if (isSymmetric) {
731       ierr = MatSetOption(*J, MAT_IGNORE_LOWER_TRIANGULAR, PETSC_TRUE);CHKERRQ(ierr);
732     }
733     if (!isShell) {
734       PetscInt *dnz, *onz, *dnzu, *onzu, bsLocal;
735 
736       if (bs < 0) {
737         if (isBlock || isSeqBlock || isMPIBlock || isSymBlock || isSymSeqBlock || isSymMPIBlock) {
738           PetscInt pStart, pEnd, p, dof;
739 
740           ierr = PetscSectionGetChart(sectionGlobal, &pStart, &pEnd);CHKERRQ(ierr);
741           for (p = pStart; p < pEnd; ++p) {
742             ierr = PetscSectionGetDof(sectionGlobal, p, &dof);CHKERRQ(ierr);
743             if (dof) {
744               bs = dof;
745               break;
746             }
747           }
748         } else {
749           bs = 1;
750         }
751         /* Must have same blocksize on all procs (some might have no points) */
752         bsLocal = bs;
753         ierr    = MPIU_Allreduce(&bsLocal, &bs, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)da));CHKERRQ(ierr);
754       }
755       ierr = PetscCalloc4(localSize/bs, &dnz, localSize/bs, &onz, localSize/bs, &dnzu, localSize/bs, &onzu);CHKERRQ(ierr);
756       /* ierr = DMPlexPreallocateOperator(dm, bs, section, sectionGlobal, dnz, onz, dnzu, onzu, *J, fillMatrix);CHKERRQ(ierr); */
757       ierr = PetscFree4(dnz, onz, dnzu, onzu);CHKERRQ(ierr);
758     }
759   }
760   /*
761                                   m
762           ------------------------------------------------------
763          |                                                     |
764          |                                                     |
765          |               ----------------------                |
766          |               |                    |                |
767       n  |           ny  |                    |                |
768          |               |                    |                |
769          |               .---------------------                |
770          |             (xs,ys)     nx                          |
771          |            .                                        |
772          |         (gxs,gys)                                   |
773          |                                                     |
774           -----------------------------------------------------
775   */
776 
777   /*
778          nc - number of components per grid point
779          col - number of colors needed in one direction for single component problem
780 
781   */
782   M   = dd->M;
783   N   = dd->N;
784   P   = dd->P;
785   dim = da->dim;
786   dof = dd->w;
787   /* ierr = DMDAGetInfo(da,&dim,&M,&N,&P,0,0,0,&dof,0,0,0,0,0);CHKERRQ(ierr); */
788   ierr = DMDAGetCorners(da,0,0,0,&nx,&ny,&nz);CHKERRQ(ierr);
789   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
790   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
791   ierr = MatSetSizes(A,dof*nx*ny*nz,dof*nx*ny*nz,dof*M*N*P,dof*M*N*P);CHKERRQ(ierr);
792   ierr = MatSetType(A,mtype);CHKERRQ(ierr);
793   ierr = MatSetDM(A,da);CHKERRQ(ierr);
794   if (da->structure_only) {
795     ierr = MatSetOption(A,MAT_STRUCTURE_ONLY,PETSC_TRUE);CHKERRQ(ierr);
796   }
797   ierr = MatGetType(A,&Atype);CHKERRQ(ierr);
798   /*
799      We do not provide a getmatrix function in the DMDA operations because
800    the basic DMDA does not know about matrices. We think of DMDA as being more
801    more low-level than matrices. This is kind of cheating but, cause sometimes
802    we think of DMDA has higher level than matrices.
803 
804      We could switch based on Atype (or mtype), but we do not since the
805    specialized setting routines depend only the particular preallocation
806    details of the matrix, not the type itself.
807   */
808   ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJSetPreallocation_C",&aij);CHKERRQ(ierr);
809   if (!aij) {
810     ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJSetPreallocation_C",&aij);CHKERRQ(ierr);
811   }
812   if (!aij) {
813     ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIBAIJSetPreallocation_C",&baij);CHKERRQ(ierr);
814     if (!baij) {
815       ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",&baij);CHKERRQ(ierr);
816     }
817     if (!baij) {
818       ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPISBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr);
819       if (!sbaij) {
820         ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr);
821       }
822       if (!sbaij) {
823         ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPISELLSetPreallocation_C",&sell);CHKERRQ(ierr);
824         if (!sell) {
825           ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqSELLSetPreallocation_C",&sell);CHKERRQ(ierr);
826         }
827       }
828       if (!sell) {
829         ierr = PetscObjectQueryFunction((PetscObject)A,"MatISSetPreallocation_C",&is);CHKERRQ(ierr);
830       }
831     }
832   }
833   if (aij) {
834     if (dim == 1) {
835       if (dd->ofill) {
836         ierr = DMCreateMatrix_DA_1d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
837       } else {
838         ierr = DMCreateMatrix_DA_1d_MPIAIJ(da,A);CHKERRQ(ierr);
839       }
840     } else if (dim == 2) {
841       if (dd->ofill) {
842         ierr = DMCreateMatrix_DA_2d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
843       } else {
844         ierr = DMCreateMatrix_DA_2d_MPIAIJ(da,A);CHKERRQ(ierr);
845       }
846     } else if (dim == 3) {
847       if (dd->ofill) {
848         ierr = DMCreateMatrix_DA_3d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
849       } else {
850         ierr = DMCreateMatrix_DA_3d_MPIAIJ(da,A);CHKERRQ(ierr);
851       }
852     }
853   } else if (baij) {
854     if (dim == 2) {
855       ierr = DMCreateMatrix_DA_2d_MPIBAIJ(da,A);CHKERRQ(ierr);
856     } else if (dim == 3) {
857       ierr = DMCreateMatrix_DA_3d_MPIBAIJ(da,A);CHKERRQ(ierr);
858     } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
859   } else if (sbaij) {
860     if (dim == 2) {
861       ierr = DMCreateMatrix_DA_2d_MPISBAIJ(da,A);CHKERRQ(ierr);
862     } else if (dim == 3) {
863       ierr = DMCreateMatrix_DA_3d_MPISBAIJ(da,A);CHKERRQ(ierr);
864     } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
865   } else if (sell) {
866      if (dim == 2) {
867        ierr = DMCreateMatrix_DA_2d_MPISELL(da,A);CHKERRQ(ierr);
868      } else if (dim == 3) {
869        ierr = DMCreateMatrix_DA_3d_MPISELL(da,A);CHKERRQ(ierr);
870      } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
871   } else if (is) {
872     ierr = DMCreateMatrix_DA_IS(da,A);CHKERRQ(ierr);
873   } else {
874     ISLocalToGlobalMapping ltog;
875 
876     ierr = MatSetBlockSize(A,dof);CHKERRQ(ierr);
877     ierr = MatSetUp(A);CHKERRQ(ierr);
878     ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
879     ierr = MatSetLocalToGlobalMapping(A,ltog,ltog);CHKERRQ(ierr);
880   }
881   ierr = DMDAGetGhostCorners(da,&starts[0],&starts[1],&starts[2],&dims[0],&dims[1],&dims[2]);CHKERRQ(ierr);
882   ierr = MatSetStencil(A,dim,dims,starts,dof);CHKERRQ(ierr);
883   ierr = MatSetDM(A,da);CHKERRQ(ierr);
884   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
885   if (size > 1) {
886     /* change viewer to display matrix in natural ordering */
887     ierr = MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA);CHKERRQ(ierr);
888     ierr = MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA);CHKERRQ(ierr);
889   }
890   ierr = MatSetFromOptions(A);CHKERRQ(ierr);
891   *J = A;
892   PetscFunctionReturn(0);
893 }
894 
895 /* ---------------------------------------------------------------------------------*/
896 PetscErrorCode DMCreateMatrix_DA_IS(DM dm,Mat J)
897 {
898   DM_DA                  *da = (DM_DA*)dm->data;
899   Mat                    lJ;
900   ISLocalToGlobalMapping ltog;
901   IS                     is_loc_filt, is_glob;
902   const PetscInt         *e_loc,*idx;
903   PetscInt               i,nel,nen,dnz,nv,dof,dim,*gidx,nb;
904   PetscErrorCode         ierr;
905 
906   /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted)
907      We need to filter the local indices that are represented through the DMDAGetElements decomposition
908      This is because the size of the local matrices in MATIS is the local size of the l2g map */
909   PetscFunctionBegin;
910   dof  = da->w;
911   dim  = dm->dim;
912 
913   ierr = MatSetBlockSize(J,dof);CHKERRQ(ierr);
914 
915   /* get local elements indices in local DMDA numbering */
916   ierr = DMDAGetElements(dm,&nel,&nen,&e_loc);CHKERRQ(ierr); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */
917   ierr = ISCreateBlock(PetscObjectComm((PetscObject)dm),dof,nel*nen,e_loc,PETSC_COPY_VALUES,&is_loc_filt);CHKERRQ(ierr);
918   ierr = DMDARestoreElements(dm,&nel,&nen,&e_loc);CHKERRQ(ierr);
919 
920   /* obtain a consistent local ordering for MATIS */
921   ierr = ISSortRemoveDups(is_loc_filt);CHKERRQ(ierr);
922   ierr = ISBlockGetLocalSize(is_loc_filt,&nb);CHKERRQ(ierr);
923   ierr = DMGetLocalToGlobalMapping(dm,&ltog);CHKERRQ(ierr);
924   ierr = ISLocalToGlobalMappingGetSize(ltog,&nv);CHKERRQ(ierr);
925   ierr = PetscMalloc1(PetscMax(nb,nv/dof),&gidx);CHKERRQ(ierr);
926   ierr = ISBlockGetIndices(is_loc_filt,&idx);CHKERRQ(ierr);
927   ierr = ISLocalToGlobalMappingApplyBlock(ltog,nb,idx,gidx);CHKERRQ(ierr);
928   ierr = ISBlockRestoreIndices(is_loc_filt,&idx);CHKERRQ(ierr);
929   ierr = ISCreateBlock(PetscObjectComm((PetscObject)dm),dof,nb,gidx,PETSC_USE_POINTER,&is_glob);CHKERRQ(ierr);
930   ierr = ISLocalToGlobalMappingCreateIS(is_glob,&ltog);CHKERRQ(ierr);
931   ierr = ISDestroy(&is_glob);CHKERRQ(ierr);
932   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
933   ierr = ISLocalToGlobalMappingDestroy(&ltog);CHKERRQ(ierr);
934 
935   /* We also attach a l2g map to the local matrices to have MatSetValueLocal to work */
936   ierr = MatISGetLocalMat(J,&lJ);CHKERRQ(ierr);
937   ierr = ISLocalToGlobalMappingCreateIS(is_loc_filt,&ltog);CHKERRQ(ierr);
938   ierr = ISDestroy(&is_loc_filt);CHKERRQ(ierr);
939   ierr = ISCreateStride(PetscObjectComm((PetscObject)lJ),nv/dof,0,1,&is_glob);CHKERRQ(ierr);
940   ierr = ISGetIndices(is_glob,&idx);CHKERRQ(ierr);
941   ierr = ISGlobalToLocalMappingApplyBlock(ltog,IS_GTOLM_MASK,nv/dof,idx,&nb,gidx);CHKERRQ(ierr);
942   ierr = ISRestoreIndices(is_glob,&idx);CHKERRQ(ierr);
943   ierr = ISDestroy(&is_glob);CHKERRQ(ierr);
944   ierr = ISLocalToGlobalMappingDestroy(&ltog);CHKERRQ(ierr);
945   ierr = ISCreateBlock(PETSC_COMM_SELF,dof,nb,gidx,PETSC_USE_POINTER,&is_loc_filt);CHKERRQ(ierr);
946   ierr = ISLocalToGlobalMappingCreateIS(is_loc_filt,&ltog);CHKERRQ(ierr);
947   ierr = ISDestroy(&is_loc_filt);CHKERRQ(ierr);
948   ierr = MatSetLocalToGlobalMapping(lJ,ltog,ltog);CHKERRQ(ierr);
949   ierr = ISLocalToGlobalMappingDestroy(&ltog);CHKERRQ(ierr);
950   ierr = PetscFree(gidx);CHKERRQ(ierr);
951 
952   /* Preallocation (not exact) */
953   switch (da->elementtype) {
954   case DMDA_ELEMENT_P1:
955   case DMDA_ELEMENT_Q1:
956     dnz = 1;
957     for (i=0; i<dim; i++) dnz *= 3;
958     dnz *= dof;
959     break;
960   default:
961     SETERRQ1(PetscObjectComm((PetscObject)dm),PETSC_ERR_SUP,"Unhandled element type %d",da->elementtype);
962     break;
963   }
964   ierr = MatSeqAIJSetPreallocation(lJ,dnz,NULL);CHKERRQ(ierr);
965   ierr = MatSeqBAIJSetPreallocation(lJ,dof,dnz/dof,NULL);CHKERRQ(ierr);
966   ierr = MatSeqSBAIJSetPreallocation(lJ,dof,dnz/dof,NULL);CHKERRQ(ierr);
967   ierr = MatISRestoreLocalMat(J,&lJ);CHKERRQ(ierr);
968   PetscFunctionReturn(0);
969 }
970 
971 PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da,Mat J)
972 {
973   PetscErrorCode         ierr;
974   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p;
975   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
976   MPI_Comm               comm;
977   PetscScalar            *values;
978   DMBoundaryType         bx,by;
979   ISLocalToGlobalMapping ltog;
980   DMDAStencilType        st;
981 
982   PetscFunctionBegin;
983   /*
984          nc - number of components per grid point
985          col - number of colors needed in one direction for single component problem
986 
987   */
988   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
989   col  = 2*s + 1;
990   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
991   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
992   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
993 
994   ierr = PetscMalloc2(nc,&rows,col*col*nc*nc,&cols);CHKERRQ(ierr);
995   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
996 
997   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
998   /* determine the matrix preallocation information */
999   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
1000   for (i=xs; i<xs+nx; i++) {
1001 
1002     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1003     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1004 
1005     for (j=ys; j<ys+ny; j++) {
1006       slot = i - gxs + gnx*(j - gys);
1007 
1008       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1009       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1010 
1011       cnt = 0;
1012       for (k=0; k<nc; k++) {
1013         for (l=lstart; l<lend+1; l++) {
1014           for (p=pstart; p<pend+1; p++) {
1015             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
1016               cols[cnt++] = k + nc*(slot + gnx*l + p);
1017             }
1018           }
1019         }
1020         rows[k] = k + nc*(slot);
1021       }
1022       ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1023     }
1024   }
1025   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1026   ierr = MatSeqSELLSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1027   ierr = MatMPISELLSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1028   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1029 
1030   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1031 
1032   /*
1033     For each node in the grid: we get the neighbors in the local (on processor ordering
1034     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1035     PETSc ordering.
1036   */
1037   if (!da->prealloc_only) {
1038     ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr);
1039     for (i=xs; i<xs+nx; i++) {
1040 
1041       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1042       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1043 
1044       for (j=ys; j<ys+ny; j++) {
1045         slot = i - gxs + gnx*(j - gys);
1046 
1047         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1048         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1049 
1050         cnt = 0;
1051         for (k=0; k<nc; k++) {
1052           for (l=lstart; l<lend+1; l++) {
1053             for (p=pstart; p<pend+1; p++) {
1054               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
1055                 cols[cnt++] = k + nc*(slot + gnx*l + p);
1056               }
1057             }
1058           }
1059           rows[k] = k + nc*(slot);
1060         }
1061         ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1062       }
1063     }
1064     ierr = PetscFree(values);CHKERRQ(ierr);
1065     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1066     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1067     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1068   }
1069   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1070   PetscFunctionReturn(0);
1071 }
1072 
1073 PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da,Mat J)
1074 {
1075   PetscErrorCode         ierr;
1076   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1077   PetscInt               m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL;
1078   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
1079   MPI_Comm               comm;
1080   PetscScalar            *values;
1081   DMBoundaryType         bx,by,bz;
1082   ISLocalToGlobalMapping ltog;
1083   DMDAStencilType        st;
1084 
1085   PetscFunctionBegin;
1086   /*
1087          nc - number of components per grid point
1088          col - number of colors needed in one direction for single component problem
1089 
1090   */
1091   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1092   col  = 2*s + 1;
1093   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1094   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1095   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1096 
1097   ierr = PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols);CHKERRQ(ierr);
1098   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1099 
1100   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1101   /* determine the matrix preallocation information */
1102   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1103   for (i=xs; i<xs+nx; i++) {
1104     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1105     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1106     for (j=ys; j<ys+ny; j++) {
1107       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1108       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1109       for (k=zs; k<zs+nz; k++) {
1110         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1111         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1112 
1113         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1114 
1115         cnt = 0;
1116         for (l=0; l<nc; l++) {
1117           for (ii=istart; ii<iend+1; ii++) {
1118             for (jj=jstart; jj<jend+1; jj++) {
1119               for (kk=kstart; kk<kend+1; kk++) {
1120                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1121                   cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1122                 }
1123               }
1124             }
1125           }
1126           rows[l] = l + nc*(slot);
1127         }
1128         ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1129       }
1130     }
1131   }
1132   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1133   ierr = MatSeqSELLSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1134   ierr = MatMPISELLSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1135   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1136   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1137 
1138   /*
1139     For each node in the grid: we get the neighbors in the local (on processor ordering
1140     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1141     PETSc ordering.
1142   */
1143   if (!da->prealloc_only) {
1144     ierr = PetscCalloc1(col*col*col*nc*nc*nc,&values);CHKERRQ(ierr);
1145     for (i=xs; i<xs+nx; i++) {
1146       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1147       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1148       for (j=ys; j<ys+ny; j++) {
1149         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1150         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1151         for (k=zs; k<zs+nz; k++) {
1152           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1153           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1154 
1155           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1156 
1157           cnt = 0;
1158           for (l=0; l<nc; l++) {
1159             for (ii=istart; ii<iend+1; ii++) {
1160               for (jj=jstart; jj<jend+1; jj++) {
1161                 for (kk=kstart; kk<kend+1; kk++) {
1162                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1163                     cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1164                   }
1165                 }
1166               }
1167             }
1168             rows[l] = l + nc*(slot);
1169           }
1170           ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1171         }
1172       }
1173     }
1174     ierr = PetscFree(values);CHKERRQ(ierr);
1175     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1176     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1177     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1178   }
1179   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1180   PetscFunctionReturn(0);
1181 }
1182 
1183 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da,Mat J)
1184 {
1185   PetscErrorCode         ierr;
1186   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,M,N;
1187   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
1188   MPI_Comm               comm;
1189   PetscScalar            *values;
1190   DMBoundaryType         bx,by;
1191   ISLocalToGlobalMapping ltog;
1192   DMDAStencilType        st;
1193   PetscBool              removedups = PETSC_FALSE;
1194 
1195   PetscFunctionBegin;
1196   /*
1197          nc - number of components per grid point
1198          col - number of colors needed in one direction for single component problem
1199 
1200   */
1201   ierr = DMDAGetInfo(da,&dim,&m,&n,&M,&N,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
1202   col  = 2*s + 1;
1203   /*
1204        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1205        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1206   */
1207   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1208   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1209   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
1210   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
1211   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1212 
1213   ierr = PetscMalloc2(nc,&rows,col*col*nc*nc,&cols);CHKERRQ(ierr);
1214   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1215 
1216   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1217   /* determine the matrix preallocation information */
1218   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
1219   for (i=xs; i<xs+nx; i++) {
1220 
1221     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1222     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1223 
1224     for (j=ys; j<ys+ny; j++) {
1225       slot = i - gxs + gnx*(j - gys);
1226 
1227       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1228       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1229 
1230       cnt = 0;
1231       for (k=0; k<nc; k++) {
1232         for (l=lstart; l<lend+1; l++) {
1233           for (p=pstart; p<pend+1; p++) {
1234             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
1235               cols[cnt++] = k + nc*(slot + gnx*l + p);
1236             }
1237           }
1238         }
1239         rows[k] = k + nc*(slot);
1240       }
1241       if (removedups) {
1242         ierr = MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1243       } else {
1244         ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1245       }
1246     }
1247   }
1248   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1249   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1250   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1251   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1252 
1253   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1254 
1255   /*
1256     For each node in the grid: we get the neighbors in the local (on processor ordering
1257     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1258     PETSc ordering.
1259   */
1260   if (!da->prealloc_only) {
1261     ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr);
1262     for (i=xs; i<xs+nx; i++) {
1263 
1264       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1265       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1266 
1267       for (j=ys; j<ys+ny; j++) {
1268         slot = i - gxs + gnx*(j - gys);
1269 
1270         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1271         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1272 
1273         cnt = 0;
1274         for (k=0; k<nc; k++) {
1275           for (l=lstart; l<lend+1; l++) {
1276             for (p=pstart; p<pend+1; p++) {
1277               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
1278                 cols[cnt++] = k + nc*(slot + gnx*l + p);
1279               }
1280             }
1281           }
1282           rows[k] = k + nc*(slot);
1283         }
1284         ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1285       }
1286     }
1287     ierr = PetscFree(values);CHKERRQ(ierr);
1288     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1289     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1290     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1291   }
1292   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1293   PetscFunctionReturn(0);
1294 }
1295 
1296 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da,Mat J)
1297 {
1298   PetscErrorCode         ierr;
1299   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1300   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt,maxcnt = 0,l,p,M,N;
1301   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
1302   DM_DA                  *dd = (DM_DA*)da->data;
1303   PetscInt               ifill_col,*ofill = dd->ofill, *dfill = dd->dfill;
1304   MPI_Comm               comm;
1305   PetscScalar            *values;
1306   DMBoundaryType         bx,by;
1307   ISLocalToGlobalMapping ltog;
1308   DMDAStencilType        st;
1309   PetscBool              removedups = PETSC_FALSE;
1310 
1311   PetscFunctionBegin;
1312   /*
1313          nc - number of components per grid point
1314          col - number of colors needed in one direction for single component problem
1315 
1316   */
1317   ierr = DMDAGetInfo(da,&dim,&m,&n,&M,&N,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
1318   col  = 2*s + 1;
1319   /*
1320        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1321        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1322   */
1323   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1324   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1325   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
1326   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
1327   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1328 
1329   ierr = PetscMalloc1(col*col*nc,&cols);CHKERRQ(ierr);
1330   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1331 
1332   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1333   /* determine the matrix preallocation information */
1334   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
1335   for (i=xs; i<xs+nx; i++) {
1336 
1337     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1338     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1339 
1340     for (j=ys; j<ys+ny; j++) {
1341       slot = i - gxs + gnx*(j - gys);
1342 
1343       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1344       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1345 
1346       for (k=0; k<nc; k++) {
1347         cnt = 0;
1348         for (l=lstart; l<lend+1; l++) {
1349           for (p=pstart; p<pend+1; p++) {
1350             if (l || p) {
1351               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
1352                 for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p);
1353               }
1354             } else {
1355               if (dfill) {
1356                 for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p);
1357               } else {
1358                 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p);
1359               }
1360             }
1361           }
1362         }
1363         row    = k + nc*(slot);
1364         maxcnt = PetscMax(maxcnt,cnt);
1365         if (removedups) {
1366           ierr   = MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1367         } else {
1368           ierr   = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1369         }
1370       }
1371     }
1372   }
1373   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1374   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1375   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1376   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1377 
1378   /*
1379     For each node in the grid: we get the neighbors in the local (on processor ordering
1380     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1381     PETSc ordering.
1382   */
1383   if (!da->prealloc_only) {
1384     ierr = PetscCalloc1(maxcnt,&values);CHKERRQ(ierr);
1385     for (i=xs; i<xs+nx; i++) {
1386 
1387       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1388       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1389 
1390       for (j=ys; j<ys+ny; j++) {
1391         slot = i - gxs + gnx*(j - gys);
1392 
1393         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1394         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1395 
1396         for (k=0; k<nc; k++) {
1397           cnt = 0;
1398           for (l=lstart; l<lend+1; l++) {
1399             for (p=pstart; p<pend+1; p++) {
1400               if (l || p) {
1401                 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
1402                   for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p);
1403                 }
1404               } else {
1405                 if (dfill) {
1406                   for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p);
1407                 } else {
1408                   for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p);
1409                 }
1410               }
1411             }
1412           }
1413           row  = k + nc*(slot);
1414           ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1415         }
1416       }
1417     }
1418     ierr = PetscFree(values);CHKERRQ(ierr);
1419     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1420     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1421     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1422   }
1423   ierr = PetscFree(cols);CHKERRQ(ierr);
1424   PetscFunctionReturn(0);
1425 }
1426 
1427 /* ---------------------------------------------------------------------------------*/
1428 
1429 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da,Mat J)
1430 {
1431   PetscErrorCode         ierr;
1432   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1433   PetscInt               m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL;
1434   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
1435   MPI_Comm               comm;
1436   PetscScalar            *values;
1437   DMBoundaryType         bx,by,bz;
1438   ISLocalToGlobalMapping ltog;
1439   DMDAStencilType        st;
1440   PetscBool              removedups = PETSC_FALSE;
1441 
1442   PetscFunctionBegin;
1443   /*
1444          nc - number of components per grid point
1445          col - number of colors needed in one direction for single component problem
1446 
1447   */
1448   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1449   col  = 2*s + 1;
1450 
1451   /*
1452        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1453        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1454   */
1455   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1456   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1457   if (P == 1 && 2*s >= p) removedups = PETSC_TRUE;
1458 
1459   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1460   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1461   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1462 
1463   ierr = PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols);CHKERRQ(ierr);
1464   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1465 
1466   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1467   /* determine the matrix preallocation information */
1468   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1469   for (i=xs; i<xs+nx; i++) {
1470     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1471     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1472     for (j=ys; j<ys+ny; j++) {
1473       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1474       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1475       for (k=zs; k<zs+nz; k++) {
1476         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1477         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1478 
1479         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1480 
1481         cnt = 0;
1482         for (l=0; l<nc; l++) {
1483           for (ii=istart; ii<iend+1; ii++) {
1484             for (jj=jstart; jj<jend+1; jj++) {
1485               for (kk=kstart; kk<kend+1; kk++) {
1486                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1487                   cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1488                 }
1489               }
1490             }
1491           }
1492           rows[l] = l + nc*(slot);
1493         }
1494         if (removedups) {
1495           ierr = MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1496         } else {
1497           ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1498         }
1499       }
1500     }
1501   }
1502   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1503   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1504   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1505   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1506   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1507 
1508   /*
1509     For each node in the grid: we get the neighbors in the local (on processor ordering
1510     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1511     PETSc ordering.
1512   */
1513   if (!da->prealloc_only) {
1514     ierr = PetscCalloc1(col*col*col*nc*nc*nc,&values);CHKERRQ(ierr);
1515     for (i=xs; i<xs+nx; i++) {
1516       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1517       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1518       for (j=ys; j<ys+ny; j++) {
1519         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1520         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1521         for (k=zs; k<zs+nz; k++) {
1522           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1523           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1524 
1525           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1526 
1527           cnt = 0;
1528           for (l=0; l<nc; l++) {
1529             for (ii=istart; ii<iend+1; ii++) {
1530               for (jj=jstart; jj<jend+1; jj++) {
1531                 for (kk=kstart; kk<kend+1; kk++) {
1532                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1533                     cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1534                   }
1535                 }
1536               }
1537             }
1538             rows[l] = l + nc*(slot);
1539           }
1540           ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1541         }
1542       }
1543     }
1544     ierr = PetscFree(values);CHKERRQ(ierr);
1545     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1546     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1547     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1548   }
1549   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1550   PetscFunctionReturn(0);
1551 }
1552 
1553 /* ---------------------------------------------------------------------------------*/
1554 
1555 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da,Mat J)
1556 {
1557   PetscErrorCode         ierr;
1558   DM_DA                  *dd = (DM_DA*)da->data;
1559   PetscInt               xs,nx,i,j,gxs,gnx,row,k,l;
1560   PetscInt               m,dim,s,*cols = NULL,nc,cnt,maxcnt = 0,*ocols;
1561   PetscInt               *ofill = dd->ofill,*dfill = dd->dfill;
1562   PetscScalar            *values;
1563   DMBoundaryType         bx;
1564   ISLocalToGlobalMapping ltog;
1565   PetscMPIInt            rank,size;
1566 
1567   PetscFunctionBegin;
1568   if (dd->bx == DM_BOUNDARY_PERIODIC) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"With fill provided not implemented with periodic boundary conditions");
1569   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)da),&rank);CHKERRQ(ierr);
1570   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)da),&size);CHKERRQ(ierr);
1571 
1572   /*
1573          nc - number of components per grid point
1574 
1575   */
1576   ierr = DMDAGetInfo(da,&dim,&m,0,0,0,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr);
1577   ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
1578   ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
1579 
1580   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1581   ierr = PetscCalloc2(nx*nc,&cols,nx*nc,&ocols);CHKERRQ(ierr);
1582 
1583   /*
1584         note should be smaller for first and last process with no periodic
1585         does not handle dfill
1586   */
1587   cnt = 0;
1588   /* coupling with process to the left */
1589   for (i=0; i<s; i++) {
1590     for (j=0; j<nc; j++) {
1591       ocols[cnt] = ((!rank) ? 0 : (s - i)*(ofill[j+1] - ofill[j]));
1592       cols[cnt]  = dfill[j+1] - dfill[j] + (s + i)*(ofill[j+1] - ofill[j]);
1593       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1594       cnt++;
1595     }
1596   }
1597   for (i=s; i<nx-s; i++) {
1598     for (j=0; j<nc; j++) {
1599       cols[cnt] = dfill[j+1] - dfill[j] + 2*s*(ofill[j+1] - ofill[j]);
1600       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1601       cnt++;
1602     }
1603   }
1604   /* coupling with process to the right */
1605   for (i=nx-s; i<nx; i++) {
1606     for (j=0; j<nc; j++) {
1607       ocols[cnt] = ((rank == (size-1)) ? 0 : (i - nx + s + 1)*(ofill[j+1] - ofill[j]));
1608       cols[cnt]  = dfill[j+1] - dfill[j] + (s + nx - i - 1)*(ofill[j+1] - ofill[j]);
1609       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1610       cnt++;
1611     }
1612   }
1613 
1614   ierr = MatSeqAIJSetPreallocation(J,0,cols);CHKERRQ(ierr);
1615   ierr = MatMPIAIJSetPreallocation(J,0,cols,0,ocols);CHKERRQ(ierr);
1616   ierr = PetscFree2(cols,ocols);CHKERRQ(ierr);
1617 
1618   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1619   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1620 
1621   /*
1622     For each node in the grid: we get the neighbors in the local (on processor ordering
1623     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1624     PETSc ordering.
1625   */
1626   if (!da->prealloc_only) {
1627     ierr = PetscCalloc2(maxcnt,&values,maxcnt,&cols);CHKERRQ(ierr);
1628 
1629     row = xs*nc;
1630     /* coupling with process to the left */
1631     for (i=xs; i<xs+s; i++) {
1632       for (j=0; j<nc; j++) {
1633         cnt = 0;
1634         if (rank) {
1635           for (l=0; l<s; l++) {
1636             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1637           }
1638         }
1639         if (dfill) {
1640           for (k=dfill[j]; k<dfill[j+1]; k++) {
1641             cols[cnt++] = i*nc + dfill[k];
1642           }
1643         } else {
1644           for (k=0; k<nc; k++) {
1645             cols[cnt++] = i*nc + k;
1646           }
1647         }
1648         for (l=0; l<s; l++) {
1649           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1650         }
1651         ierr = MatSetValues(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1652         row++;
1653       }
1654     }
1655     for (i=xs+s; i<xs+nx-s; i++) {
1656       for (j=0; j<nc; j++) {
1657         cnt = 0;
1658         for (l=0; l<s; l++) {
1659           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1660         }
1661         if (dfill) {
1662           for (k=dfill[j]; k<dfill[j+1]; k++) {
1663             cols[cnt++] = i*nc + dfill[k];
1664           }
1665         } else {
1666           for (k=0; k<nc; k++) {
1667             cols[cnt++] = i*nc + k;
1668           }
1669         }
1670         for (l=0; l<s; l++) {
1671           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1672         }
1673         ierr = MatSetValues(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1674         row++;
1675       }
1676     }
1677     /* coupling with process to the right */
1678     for (i=xs+nx-s; i<xs+nx; i++) {
1679       for (j=0; j<nc; j++) {
1680         cnt = 0;
1681         for (l=0; l<s; l++) {
1682           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1683         }
1684         if (dfill) {
1685           for (k=dfill[j]; k<dfill[j+1]; k++) {
1686             cols[cnt++] = i*nc + dfill[k];
1687           }
1688         } else {
1689           for (k=0; k<nc; k++) {
1690             cols[cnt++] = i*nc + k;
1691           }
1692         }
1693         if (rank < size-1) {
1694           for (l=0; l<s; l++) {
1695             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1696           }
1697         }
1698         ierr = MatSetValues(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1699         row++;
1700       }
1701     }
1702     ierr = PetscFree2(values,cols);CHKERRQ(ierr);
1703     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1704     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1705     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1706   }
1707   PetscFunctionReturn(0);
1708 }
1709 
1710 /* ---------------------------------------------------------------------------------*/
1711 
1712 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da,Mat J)
1713 {
1714   PetscErrorCode         ierr;
1715   PetscInt               xs,nx,i,i1,slot,gxs,gnx;
1716   PetscInt               m,dim,s,*cols = NULL,nc,*rows = NULL,col,cnt,l;
1717   PetscInt               istart,iend;
1718   PetscScalar            *values;
1719   DMBoundaryType         bx;
1720   ISLocalToGlobalMapping ltog;
1721 
1722   PetscFunctionBegin;
1723   /*
1724          nc - number of components per grid point
1725          col - number of colors needed in one direction for single component problem
1726 
1727   */
1728   ierr = DMDAGetInfo(da,&dim,&m,0,0,0,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr);
1729   col  = 2*s + 1;
1730 
1731   ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
1732   ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
1733 
1734   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1735   ierr = MatSeqAIJSetPreallocation(J,col*nc,0);CHKERRQ(ierr);
1736   ierr = MatMPIAIJSetPreallocation(J,col*nc,0,col*nc,0);CHKERRQ(ierr);
1737 
1738   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1739   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1740 
1741   /*
1742     For each node in the grid: we get the neighbors in the local (on processor ordering
1743     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1744     PETSc ordering.
1745   */
1746   if (!da->prealloc_only) {
1747     ierr = PetscMalloc2(nc,&rows,col*nc*nc,&cols);CHKERRQ(ierr);
1748     ierr = PetscCalloc1(col*nc*nc,&values);CHKERRQ(ierr);
1749     for (i=xs; i<xs+nx; i++) {
1750       istart = PetscMax(-s,gxs - i);
1751       iend   = PetscMin(s,gxs + gnx - i - 1);
1752       slot   = i - gxs;
1753 
1754       cnt = 0;
1755       for (l=0; l<nc; l++) {
1756         for (i1=istart; i1<iend+1; i1++) {
1757           cols[cnt++] = l + nc*(slot + i1);
1758         }
1759         rows[l] = l + nc*(slot);
1760       }
1761       ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1762     }
1763     ierr = PetscFree(values);CHKERRQ(ierr);
1764     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1765     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1766     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1767     ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1768   }
1769   PetscFunctionReturn(0);
1770 }
1771 
1772 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da,Mat J)
1773 {
1774   PetscErrorCode         ierr;
1775   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1776   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
1777   PetscInt               istart,iend,jstart,jend,ii,jj;
1778   MPI_Comm               comm;
1779   PetscScalar            *values;
1780   DMBoundaryType         bx,by;
1781   DMDAStencilType        st;
1782   ISLocalToGlobalMapping ltog;
1783 
1784   PetscFunctionBegin;
1785   /*
1786      nc - number of components per grid point
1787      col - number of colors needed in one direction for single component problem
1788   */
1789   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
1790   col  = 2*s + 1;
1791 
1792   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
1793   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
1794   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1795 
1796   ierr = PetscMalloc1(col*col*nc*nc,&cols);CHKERRQ(ierr);
1797 
1798   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1799 
1800   /* determine the matrix preallocation information */
1801   ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
1802   for (i=xs; i<xs+nx; i++) {
1803     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1804     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1805     for (j=ys; j<ys+ny; j++) {
1806       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1807       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1808       slot   = i - gxs + gnx*(j - gys);
1809 
1810       /* Find block columns in block row */
1811       cnt = 0;
1812       for (ii=istart; ii<iend+1; ii++) {
1813         for (jj=jstart; jj<jend+1; jj++) {
1814           if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1815             cols[cnt++] = slot + ii + gnx*jj;
1816           }
1817         }
1818       }
1819       ierr = MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1820     }
1821   }
1822   ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1823   ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1824   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1825 
1826   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1827 
1828   /*
1829     For each node in the grid: we get the neighbors in the local (on processor ordering
1830     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1831     PETSc ordering.
1832   */
1833   if (!da->prealloc_only) {
1834     ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr);
1835     for (i=xs; i<xs+nx; i++) {
1836       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1837       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1838       for (j=ys; j<ys+ny; j++) {
1839         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1840         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1841         slot = i - gxs + gnx*(j - gys);
1842         cnt  = 0;
1843         for (ii=istart; ii<iend+1; ii++) {
1844           for (jj=jstart; jj<jend+1; jj++) {
1845             if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1846               cols[cnt++] = slot + ii + gnx*jj;
1847             }
1848           }
1849         }
1850         ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1851       }
1852     }
1853     ierr = PetscFree(values);CHKERRQ(ierr);
1854     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1855     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1856     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1857   }
1858   ierr = PetscFree(cols);CHKERRQ(ierr);
1859   PetscFunctionReturn(0);
1860 }
1861 
1862 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da,Mat J)
1863 {
1864   PetscErrorCode         ierr;
1865   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1866   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
1867   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1868   MPI_Comm               comm;
1869   PetscScalar            *values;
1870   DMBoundaryType         bx,by,bz;
1871   DMDAStencilType        st;
1872   ISLocalToGlobalMapping ltog;
1873 
1874   PetscFunctionBegin;
1875   /*
1876          nc - number of components per grid point
1877          col - number of colors needed in one direction for single component problem
1878 
1879   */
1880   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1881   col  = 2*s + 1;
1882 
1883   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1884   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1885   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1886 
1887   ierr = PetscMalloc1(col*col*col,&cols);CHKERRQ(ierr);
1888 
1889   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1890 
1891   /* determine the matrix preallocation information */
1892   ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1893   for (i=xs; i<xs+nx; i++) {
1894     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1895     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1896     for (j=ys; j<ys+ny; j++) {
1897       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1898       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1899       for (k=zs; k<zs+nz; k++) {
1900         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1901         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1902 
1903         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1904 
1905         /* Find block columns in block row */
1906         cnt = 0;
1907         for (ii=istart; ii<iend+1; ii++) {
1908           for (jj=jstart; jj<jend+1; jj++) {
1909             for (kk=kstart; kk<kend+1; kk++) {
1910               if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1911                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1912               }
1913             }
1914           }
1915         }
1916         ierr = MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1917       }
1918     }
1919   }
1920   ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1921   ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1922   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1923 
1924   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1925 
1926   /*
1927     For each node in the grid: we get the neighbors in the local (on processor ordering
1928     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1929     PETSc ordering.
1930   */
1931   if (!da->prealloc_only) {
1932     ierr = PetscCalloc1(col*col*col*nc*nc,&values);CHKERRQ(ierr);
1933     for (i=xs; i<xs+nx; i++) {
1934       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1935       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1936       for (j=ys; j<ys+ny; j++) {
1937         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1938         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1939         for (k=zs; k<zs+nz; k++) {
1940           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1941           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1942 
1943           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1944 
1945           cnt = 0;
1946           for (ii=istart; ii<iend+1; ii++) {
1947             for (jj=jstart; jj<jend+1; jj++) {
1948               for (kk=kstart; kk<kend+1; kk++) {
1949                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1950                   cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1951                 }
1952               }
1953             }
1954           }
1955           ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1956         }
1957       }
1958     }
1959     ierr = PetscFree(values);CHKERRQ(ierr);
1960     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1961     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1962     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1963   }
1964   ierr = PetscFree(cols);CHKERRQ(ierr);
1965   PetscFunctionReturn(0);
1966 }
1967 
1968 /*
1969   This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to
1970   identify in the local ordering with periodic domain.
1971 */
1972 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog,PetscInt *row,PetscInt *cnt,PetscInt col[])
1973 {
1974   PetscErrorCode ierr;
1975   PetscInt       i,n;
1976 
1977   PetscFunctionBegin;
1978   ierr = ISLocalToGlobalMappingApplyBlock(ltog,1,row,row);CHKERRQ(ierr);
1979   ierr = ISLocalToGlobalMappingApplyBlock(ltog,*cnt,col,col);CHKERRQ(ierr);
1980   for (i=0,n=0; i<*cnt; i++) {
1981     if (col[i] >= *row) col[n++] = col[i];
1982   }
1983   *cnt = n;
1984   PetscFunctionReturn(0);
1985 }
1986 
1987 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da,Mat J)
1988 {
1989   PetscErrorCode         ierr;
1990   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1991   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
1992   PetscInt               istart,iend,jstart,jend,ii,jj;
1993   MPI_Comm               comm;
1994   PetscScalar            *values;
1995   DMBoundaryType         bx,by;
1996   DMDAStencilType        st;
1997   ISLocalToGlobalMapping ltog;
1998 
1999   PetscFunctionBegin;
2000   /*
2001      nc - number of components per grid point
2002      col - number of colors needed in one direction for single component problem
2003   */
2004   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
2005   col  = 2*s + 1;
2006 
2007   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
2008   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
2009   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
2010 
2011   ierr = PetscMalloc1(col*col*nc*nc,&cols);CHKERRQ(ierr);
2012 
2013   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
2014 
2015   /* determine the matrix preallocation information */
2016   ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
2017   for (i=xs; i<xs+nx; i++) {
2018     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2019     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2020     for (j=ys; j<ys+ny; j++) {
2021       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2022       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2023       slot   = i - gxs + gnx*(j - gys);
2024 
2025       /* Find block columns in block row */
2026       cnt = 0;
2027       for (ii=istart; ii<iend+1; ii++) {
2028         for (jj=jstart; jj<jend+1; jj++) {
2029           if (st == DMDA_STENCIL_BOX || !ii || !jj) {
2030             cols[cnt++] = slot + ii + gnx*jj;
2031           }
2032         }
2033       }
2034       ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
2035       ierr = MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
2036     }
2037   }
2038   ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
2039   ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
2040   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2041 
2042   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
2043 
2044   /*
2045     For each node in the grid: we get the neighbors in the local (on processor ordering
2046     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2047     PETSc ordering.
2048   */
2049   if (!da->prealloc_only) {
2050     ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr);
2051     for (i=xs; i<xs+nx; i++) {
2052       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2053       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2054       for (j=ys; j<ys+ny; j++) {
2055         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2056         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2057         slot   = i - gxs + gnx*(j - gys);
2058 
2059         /* Find block columns in block row */
2060         cnt = 0;
2061         for (ii=istart; ii<iend+1; ii++) {
2062           for (jj=jstart; jj<jend+1; jj++) {
2063             if (st == DMDA_STENCIL_BOX || !ii || !jj) {
2064               cols[cnt++] = slot + ii + gnx*jj;
2065             }
2066           }
2067         }
2068         ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
2069         ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
2070       }
2071     }
2072     ierr = PetscFree(values);CHKERRQ(ierr);
2073     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2074     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2075     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2076   }
2077   ierr = PetscFree(cols);CHKERRQ(ierr);
2078   PetscFunctionReturn(0);
2079 }
2080 
2081 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da,Mat J)
2082 {
2083   PetscErrorCode         ierr;
2084   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
2085   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
2086   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
2087   MPI_Comm               comm;
2088   PetscScalar            *values;
2089   DMBoundaryType         bx,by,bz;
2090   DMDAStencilType        st;
2091   ISLocalToGlobalMapping ltog;
2092 
2093   PetscFunctionBegin;
2094   /*
2095      nc - number of components per grid point
2096      col - number of colors needed in one direction for single component problem
2097   */
2098   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
2099   col  = 2*s + 1;
2100 
2101   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
2102   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
2103   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
2104 
2105   /* create the matrix */
2106   ierr = PetscMalloc1(col*col*col,&cols);CHKERRQ(ierr);
2107 
2108   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
2109 
2110   /* determine the matrix preallocation information */
2111   ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
2112   for (i=xs; i<xs+nx; i++) {
2113     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2114     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2115     for (j=ys; j<ys+ny; j++) {
2116       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2117       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2118       for (k=zs; k<zs+nz; k++) {
2119         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2120         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2121 
2122         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2123 
2124         /* Find block columns in block row */
2125         cnt = 0;
2126         for (ii=istart; ii<iend+1; ii++) {
2127           for (jj=jstart; jj<jend+1; jj++) {
2128             for (kk=kstart; kk<kend+1; kk++) {
2129               if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
2130                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
2131               }
2132             }
2133           }
2134         }
2135         ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
2136         ierr = MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
2137       }
2138     }
2139   }
2140   ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
2141   ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
2142   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2143 
2144   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
2145 
2146   /*
2147     For each node in the grid: we get the neighbors in the local (on processor ordering
2148     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2149     PETSc ordering.
2150   */
2151   if (!da->prealloc_only) {
2152     ierr = PetscCalloc1(col*col*col*nc*nc,&values);CHKERRQ(ierr);
2153     for (i=xs; i<xs+nx; i++) {
2154       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2155       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2156       for (j=ys; j<ys+ny; j++) {
2157         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2158         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2159         for (k=zs; k<zs+nz; k++) {
2160           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2161           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2162 
2163           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2164 
2165           cnt = 0;
2166           for (ii=istart; ii<iend+1; ii++) {
2167             for (jj=jstart; jj<jend+1; jj++) {
2168               for (kk=kstart; kk<kend+1; kk++) {
2169                 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
2170                   cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
2171                 }
2172               }
2173             }
2174           }
2175           ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
2176           ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
2177         }
2178       }
2179     }
2180     ierr = PetscFree(values);CHKERRQ(ierr);
2181     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2182     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2183     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2184   }
2185   ierr = PetscFree(cols);CHKERRQ(ierr);
2186   PetscFunctionReturn(0);
2187 }
2188 
2189 /* ---------------------------------------------------------------------------------*/
2190 
2191 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da,Mat J)
2192 {
2193   PetscErrorCode         ierr;
2194   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
2195   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt, maxcnt = 0,l,p,*dnz,*onz;
2196   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
2197   DM_DA                  *dd = (DM_DA*)da->data;
2198   PetscInt               ifill_col,*dfill = dd->dfill,*ofill = dd->ofill;
2199   MPI_Comm               comm;
2200   PetscScalar            *values;
2201   DMBoundaryType         bx,by,bz;
2202   ISLocalToGlobalMapping ltog;
2203   DMDAStencilType        st;
2204   PetscBool              removedups = PETSC_FALSE;
2205 
2206   PetscFunctionBegin;
2207   /*
2208          nc - number of components per grid point
2209          col - number of colors needed in one direction for single component problem
2210 
2211   */
2212   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
2213   col  = 2*s + 1;
2214   if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
2215                  by 2*stencil_width + 1\n");
2216   if (by == DM_BOUNDARY_PERIODIC && (n % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
2217                  by 2*stencil_width + 1\n");
2218   if (bz == DM_BOUNDARY_PERIODIC && (p % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
2219                  by 2*stencil_width + 1\n");
2220 
2221   /*
2222        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
2223        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
2224   */
2225   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
2226   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
2227   if (P == 1 && 2*s >= p) removedups = PETSC_TRUE;
2228 
2229   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
2230   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
2231   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
2232 
2233   ierr = PetscMalloc1(col*col*col*nc,&cols);CHKERRQ(ierr);
2234   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
2235 
2236   /* determine the matrix preallocation information */
2237   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
2238 
2239   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
2240   for (i=xs; i<xs+nx; i++) {
2241     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2242     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2243     for (j=ys; j<ys+ny; j++) {
2244       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2245       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2246       for (k=zs; k<zs+nz; k++) {
2247         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2248         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2249 
2250         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2251 
2252         for (l=0; l<nc; l++) {
2253           cnt = 0;
2254           for (ii=istart; ii<iend+1; ii++) {
2255             for (jj=jstart; jj<jend+1; jj++) {
2256               for (kk=kstart; kk<kend+1; kk++) {
2257                 if (ii || jj || kk) {
2258                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
2259                     for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2260                   }
2261                 } else {
2262                   if (dfill) {
2263                     for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2264                   } else {
2265                     for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2266                   }
2267                 }
2268               }
2269             }
2270           }
2271           row  = l + nc*(slot);
2272           maxcnt = PetscMax(maxcnt,cnt);
2273           if (removedups) {
2274             ierr = MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
2275           } else {
2276             ierr = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
2277           }
2278         }
2279       }
2280     }
2281   }
2282   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
2283   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
2284   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2285   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
2286 
2287   /*
2288     For each node in the grid: we get the neighbors in the local (on processor ordering
2289     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2290     PETSc ordering.
2291   */
2292   if (!da->prealloc_only) {
2293     ierr = PetscCalloc1(maxcnt,&values);CHKERRQ(ierr);
2294     for (i=xs; i<xs+nx; i++) {
2295       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2296       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2297       for (j=ys; j<ys+ny; j++) {
2298         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2299         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2300         for (k=zs; k<zs+nz; k++) {
2301           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2302           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2303 
2304           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2305 
2306           for (l=0; l<nc; l++) {
2307             cnt = 0;
2308             for (ii=istart; ii<iend+1; ii++) {
2309               for (jj=jstart; jj<jend+1; jj++) {
2310                 for (kk=kstart; kk<kend+1; kk++) {
2311                   if (ii || jj || kk) {
2312                     if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
2313                       for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2314                     }
2315                   } else {
2316                     if (dfill) {
2317                       for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2318                     } else {
2319                       for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2320                     }
2321                   }
2322                 }
2323               }
2324             }
2325             row  = l + nc*(slot);
2326             ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
2327           }
2328         }
2329       }
2330     }
2331     ierr = PetscFree(values);CHKERRQ(ierr);
2332     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2333     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2334     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2335   }
2336   ierr = PetscFree(cols);CHKERRQ(ierr);
2337   PetscFunctionReturn(0);
2338 }
2339