xref: /petsc/src/dm/impls/da/fdda.c (revision 487a658c8b32ba712a1dc8280daad2fd70c1dcd9)
1 
2 #include <petsc/private/dmdaimpl.h> /*I      "petscdmda.h"     I*/
3 #include <petscmat.h>
4 
5 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM,ISColoringType,ISColoring*);
6 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM,ISColoringType,ISColoring*);
7 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM,ISColoringType,ISColoring*);
8 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM,ISColoringType,ISColoring*);
9 
10 /*
11    For ghost i that may be negative or greater than the upper bound this
12   maps it into the 0:m-1 range using periodicity
13 */
14 #define SetInRange(i,m) ((i < 0) ? m+i : ((i >= m) ? i-m : i))
15 
16 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill,PetscInt w,PetscInt **rfill)
17 {
18   PetscErrorCode ierr;
19   PetscInt       i,j,nz,*fill;
20 
21   PetscFunctionBegin;
22   if (!dfill) PetscFunctionReturn(0);
23 
24   /* count number nonzeros */
25   nz = 0;
26   for (i=0; i<w; i++) {
27     for (j=0; j<w; j++) {
28       if (dfill[w*i+j]) nz++;
29     }
30   }
31   ierr = PetscMalloc1(nz + w + 1,&fill);CHKERRQ(ierr);
32   /* construct modified CSR storage of nonzero structure */
33   /*  fill[0 -- w] marks starts of each row of column indices (and end of last row)
34    so fill[1] - fill[0] gives number of nonzeros in first row etc */
35   nz = w + 1;
36   for (i=0; i<w; i++) {
37     fill[i] = nz;
38     for (j=0; j<w; j++) {
39       if (dfill[w*i+j]) {
40         fill[nz] = j;
41         nz++;
42       }
43     }
44   }
45   fill[w] = nz;
46 
47   *rfill = fill;
48   PetscFunctionReturn(0);
49 }
50 
51 
52 static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse,PetscInt w,PetscInt **rfill)
53 {
54   PetscErrorCode ierr;
55   PetscInt       i,nz,*fill;
56 
57   PetscFunctionBegin;
58   if (!dfillsparse) PetscFunctionReturn(0);
59 
60   /* Determine number of non-zeros */
61   nz = (dfillsparse[w] - w - 1);
62 
63   /* Allocate space for our copy of the given sparse matrix representation. */
64   ierr = PetscMalloc1(nz + w + 1,&fill);CHKERRQ(ierr);
65 
66   /* Copy the given sparse matrix representation. */
67   for(i = 0; i < (nz + w + 1); ++i)
68   {
69     fill[i] = dfillsparse[i];
70   }
71 
72   *rfill = fill;
73   PetscFunctionReturn(0);
74 }
75 
76 
77 static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd)
78 {
79   PetscErrorCode ierr;
80   PetscInt       i,k,cnt = 1;
81 
82   PetscFunctionBegin;
83 
84   /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of
85    columns to the left with any nonzeros in them plus 1 */
86   ierr = PetscCalloc1(dd->w,&dd->ofillcols);CHKERRQ(ierr);
87   for (i=0; i<dd->w; i++) {
88     for (k=dd->ofill[i]; k<dd->ofill[i+1]; k++) dd->ofillcols[dd->ofill[k]] = 1;
89   }
90   for (i=0; i<dd->w; i++) {
91     if (dd->ofillcols[i]) {
92       dd->ofillcols[i] = cnt++;
93     }
94   }
95   PetscFunctionReturn(0);
96 }
97 
98 
99 
100 /*@
101     DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem
102     of the matrix returned by DMCreateMatrix().
103 
104     Logically Collective on DMDA
105 
106     Input Parameter:
107 +   da - the distributed array
108 .   dfill - the fill pattern in the diagonal block (may be NULL, means use dense block)
109 -   ofill - the fill pattern in the off-diagonal blocks
110 
111 
112     Level: developer
113 
114     Notes:
115     This only makes sense when you are doing multicomponent problems but using the
116        MPIAIJ matrix format
117 
118            The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries
119        representing coupling and 0 entries for missing coupling. For example
120 $             dfill[9] = {1, 0, 0,
121 $                         1, 1, 0,
122 $                         0, 1, 1}
123        means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with
124        itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the
125        diagonal block).
126 
127      DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
128      can be represented in the dfill, ofill format
129 
130    Contributed by Glenn Hammond
131 
132 .seealso DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly()
133 
134 @*/
135 PetscErrorCode  DMDASetBlockFills(DM da,const PetscInt *dfill,const PetscInt *ofill)
136 {
137   DM_DA          *dd = (DM_DA*)da->data;
138   PetscErrorCode ierr;
139 
140   PetscFunctionBegin;
141   /* save the given dfill and ofill information */
142   ierr = DMDASetBlockFills_Private(dfill,dd->w,&dd->dfill);CHKERRQ(ierr);
143   ierr = DMDASetBlockFills_Private(ofill,dd->w,&dd->ofill);CHKERRQ(ierr);
144 
145   /* count nonzeros in ofill columns */
146   ierr = DMDASetBlockFills_Private2(dd);CHKERRQ(ierr);
147 
148   PetscFunctionReturn(0);
149 }
150 
151 
152 /*@
153     DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem
154     of the matrix returned by DMCreateMatrix(), using sparse representations
155     of fill patterns.
156 
157     Logically Collective on DMDA
158 
159     Input Parameter:
160 +   da - the distributed array
161 .   dfill - the sparse fill pattern in the diagonal block (may be NULL, means use dense block)
162 -   ofill - the sparse fill pattern in the off-diagonal blocks
163 
164 
165     Level: developer
166 
167     Notes: This only makes sense when you are doing multicomponent problems but using the
168        MPIAIJ matrix format
169 
170            The format for dfill and ofill is a sparse representation of a
171            dof-by-dof matrix with 1 entries representing coupling and 0 entries
172            for missing coupling.  The sparse representation is a 1 dimensional
173            array of length nz + dof + 1, where nz is the number of non-zeros in
174            the matrix.  The first dof entries in the array give the
175            starting array indices of each row's items in the rest of the array,
176            the dof+1st item indicates the total number of nonzeros,
177            and the remaining nz items give the column indices of each of
178            the 1s within the logical 2D matrix.  Each row's items within
179            the array are the column indices of the 1s within that row
180            of the 2D matrix.  PETSc developers may recognize that this is the
181            same format as that computed by the DMDASetBlockFills_Private()
182            function from a dense 2D matrix representation.
183 
184      DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
185      can be represented in the dfill, ofill format
186 
187    Contributed by Philip C. Roth
188 
189 .seealso DMDASetBlockFills(), DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly()
190 
191 @*/
192 PetscErrorCode  DMDASetBlockFillsSparse(DM da,const PetscInt *dfillsparse,const PetscInt *ofillsparse)
193 {
194   DM_DA          *dd = (DM_DA*)da->data;
195   PetscErrorCode ierr;
196 
197   PetscFunctionBegin;
198   /* save the given dfill and ofill information */
199   ierr = DMDASetBlockFillsSparse_Private(dfillsparse,dd->w,&dd->dfill);CHKERRQ(ierr);
200   ierr = DMDASetBlockFillsSparse_Private(ofillsparse,dd->w,&dd->ofill);CHKERRQ(ierr);
201 
202   /* count nonzeros in ofill columns */
203   ierr = DMDASetBlockFills_Private2(dd);CHKERRQ(ierr);
204 
205   PetscFunctionReturn(0);
206 }
207 
208 
209 PetscErrorCode  DMCreateColoring_DA(DM da,ISColoringType ctype,ISColoring *coloring)
210 {
211   PetscErrorCode   ierr;
212   PetscInt         dim,m,n,p,nc;
213   DMBoundaryType bx,by,bz;
214   MPI_Comm         comm;
215   PetscMPIInt      size;
216   PetscBool        isBAIJ;
217   DM_DA            *dd = (DM_DA*)da->data;
218 
219   PetscFunctionBegin;
220   /*
221                                   m
222           ------------------------------------------------------
223          |                                                     |
224          |                                                     |
225          |               ----------------------                |
226          |               |                    |                |
227       n  |           yn  |                    |                |
228          |               |                    |                |
229          |               .---------------------                |
230          |             (xs,ys)     xn                          |
231          |            .                                        |
232          |         (gxs,gys)                                   |
233          |                                                     |
234           -----------------------------------------------------
235   */
236 
237   /*
238          nc - number of components per grid point
239          col - number of colors needed in one direction for single component problem
240 
241   */
242   ierr = DMDAGetInfo(da,&dim,0,0,0,&m,&n,&p,&nc,0,&bx,&by,&bz,0);CHKERRQ(ierr);
243 
244   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
245   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
246   if (ctype == IS_COLORING_LOCAL) {
247     if (size == 1) {
248       ctype = IS_COLORING_GLOBAL;
249     } else if (dim > 1) {
250       if ((m==1 && bx == DM_BOUNDARY_PERIODIC) || (n==1 && by == DM_BOUNDARY_PERIODIC) || (p==1 && bz == DM_BOUNDARY_PERIODIC)) {
251         SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain  on the same process");
252       }
253     }
254   }
255 
256   /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ
257      matrices is for the blocks, not the individual matrix elements  */
258   ierr = PetscStrcmp(da->mattype,MATBAIJ,&isBAIJ);CHKERRQ(ierr);
259   if (!isBAIJ) {ierr = PetscStrcmp(da->mattype,MATMPIBAIJ,&isBAIJ);CHKERRQ(ierr);}
260   if (!isBAIJ) {ierr = PetscStrcmp(da->mattype,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);}
261   if (isBAIJ) {
262     dd->w  = 1;
263     dd->xs = dd->xs/nc;
264     dd->xe = dd->xe/nc;
265     dd->Xs = dd->Xs/nc;
266     dd->Xe = dd->Xe/nc;
267   }
268 
269   /*
270      We do not provide a getcoloring function in the DMDA operations because
271    the basic DMDA does not know about matrices. We think of DMDA as being more
272    more low-level then matrices.
273   */
274   if (dim == 1) {
275     ierr = DMCreateColoring_DA_1d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
276   } else if (dim == 2) {
277     ierr =  DMCreateColoring_DA_2d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
278   } else if (dim == 3) {
279     ierr =  DMCreateColoring_DA_3d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
280   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not done for %D dimension, send us mail petsc-maint@mcs.anl.gov for code",dim);
281   if (isBAIJ) {
282     dd->w  = nc;
283     dd->xs = dd->xs*nc;
284     dd->xe = dd->xe*nc;
285     dd->Xs = dd->Xs*nc;
286     dd->Xe = dd->Xe*nc;
287   }
288   PetscFunctionReturn(0);
289 }
290 
291 /* ---------------------------------------------------------------------------------*/
292 
293 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
294 {
295   PetscErrorCode   ierr;
296   PetscInt         xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,M,N,dim,s,k,nc,col;
297   PetscInt         ncolors;
298   MPI_Comm         comm;
299   DMBoundaryType bx,by;
300   DMDAStencilType  st;
301   ISColoringValue  *colors;
302   DM_DA            *dd = (DM_DA*)da->data;
303 
304   PetscFunctionBegin;
305   /*
306          nc - number of components per grid point
307          col - number of colors needed in one direction for single component problem
308 
309   */
310   ierr = DMDAGetInfo(da,&dim,&m,&n,0,&M,&N,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
311   col  = 2*s + 1;
312   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
313   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
314   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
315 
316   /* special case as taught to us by Paul Hovland */
317   if (st == DMDA_STENCIL_STAR && s == 1) {
318     ierr = DMCreateColoring_DA_2d_5pt_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
319   } else {
320 
321     if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ2(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X (%d) is divisible\n\
322                                                             by 2*stencil_width + 1 (%d)\n", m, col);
323     if (by == DM_BOUNDARY_PERIODIC && (n % col)) SETERRQ2(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y (%d) is divisible\n\
324                                                             by 2*stencil_width + 1 (%d)\n", n, col);
325     if (ctype == IS_COLORING_GLOBAL) {
326       if (!dd->localcoloring) {
327         ierr = PetscMalloc1(nc*nx*ny,&colors);CHKERRQ(ierr);
328         ii   = 0;
329         for (j=ys; j<ys+ny; j++) {
330           for (i=xs; i<xs+nx; i++) {
331             for (k=0; k<nc; k++) {
332               colors[ii++] = k + nc*((i % col) + col*(j % col));
333             }
334           }
335         }
336         ncolors = nc + nc*(col-1 + col*(col-1));
337         ierr    = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
338       }
339       *coloring = dd->localcoloring;
340     } else if (ctype == IS_COLORING_LOCAL) {
341       if (!dd->ghostedcoloring) {
342         ierr = PetscMalloc1(nc*gnx*gny,&colors);CHKERRQ(ierr);
343         ii   = 0;
344         for (j=gys; j<gys+gny; j++) {
345           for (i=gxs; i<gxs+gnx; i++) {
346             for (k=0; k<nc; k++) {
347               /* the complicated stuff is to handle periodic boundaries */
348               colors[ii++] = k + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col));
349             }
350           }
351         }
352         ncolors = nc + nc*(col - 1 + col*(col-1));
353         ierr    = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
354         /* PetscIntView(ncolors,(PetscInt*)colors,0); */
355 
356         ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
357       }
358       *coloring = dd->ghostedcoloring;
359     } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
360   }
361   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
362   PetscFunctionReturn(0);
363 }
364 
365 /* ---------------------------------------------------------------------------------*/
366 
367 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
368 {
369   PetscErrorCode   ierr;
370   PetscInt         xs,ys,nx,ny,i,j,gxs,gys,gnx,gny,m,n,p,dim,s,k,nc,col,zs,gzs,ii,l,nz,gnz,M,N,P;
371   PetscInt         ncolors;
372   MPI_Comm         comm;
373   DMBoundaryType bx,by,bz;
374   DMDAStencilType  st;
375   ISColoringValue  *colors;
376   DM_DA            *dd = (DM_DA*)da->data;
377 
378   PetscFunctionBegin;
379   /*
380          nc - number of components per grid point
381          col - number of colors needed in one direction for single component problem
382 
383   */
384   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
385   col  = 2*s + 1;
386   if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
387                                                          by 2*stencil_width + 1\n");
388   if (by == DM_BOUNDARY_PERIODIC && (n % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
389                                                          by 2*stencil_width + 1\n");
390   if (bz == DM_BOUNDARY_PERIODIC && (p % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
391                                                          by 2*stencil_width + 1\n");
392 
393   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
394   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
395   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
396 
397   /* create the coloring */
398   if (ctype == IS_COLORING_GLOBAL) {
399     if (!dd->localcoloring) {
400       ierr = PetscMalloc1(nc*nx*ny*nz,&colors);CHKERRQ(ierr);
401       ii   = 0;
402       for (k=zs; k<zs+nz; k++) {
403         for (j=ys; j<ys+ny; j++) {
404           for (i=xs; i<xs+nx; i++) {
405             for (l=0; l<nc; l++) {
406               colors[ii++] = l + nc*((i % col) + col*(j % col) + col*col*(k % col));
407             }
408           }
409         }
410       }
411       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
412       ierr    = ISColoringCreate(comm,ncolors,nc*nx*ny*nz,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
413     }
414     *coloring = dd->localcoloring;
415   } else if (ctype == IS_COLORING_LOCAL) {
416     if (!dd->ghostedcoloring) {
417       ierr = PetscMalloc1(nc*gnx*gny*gnz,&colors);CHKERRQ(ierr);
418       ii   = 0;
419       for (k=gzs; k<gzs+gnz; k++) {
420         for (j=gys; j<gys+gny; j++) {
421           for (i=gxs; i<gxs+gnx; i++) {
422             for (l=0; l<nc; l++) {
423               /* the complicated stuff is to handle periodic boundaries */
424               colors[ii++] = l + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col) + col*col*(SetInRange(k,p) % col));
425             }
426           }
427         }
428       }
429       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
430       ierr    = ISColoringCreate(comm,ncolors,nc*gnx*gny*gnz,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
431       ierr    = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
432     }
433     *coloring = dd->ghostedcoloring;
434   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
435   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
436   PetscFunctionReturn(0);
437 }
438 
439 /* ---------------------------------------------------------------------------------*/
440 
441 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
442 {
443   PetscErrorCode   ierr;
444   PetscInt         xs,nx,i,i1,gxs,gnx,l,m,M,dim,s,nc,col;
445   PetscInt         ncolors;
446   MPI_Comm         comm;
447   DMBoundaryType bx;
448   ISColoringValue  *colors;
449   DM_DA            *dd = (DM_DA*)da->data;
450 
451   PetscFunctionBegin;
452   /*
453          nc - number of components per grid point
454          col - number of colors needed in one direction for single component problem
455 
456   */
457   ierr = DMDAGetInfo(da,&dim,&m,0,0,&M,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr);
458   col  = 2*s + 1;
459 
460   if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ2(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points %d is divisible\n\
461                                                           by 2*stencil_width + 1 %d\n",(int)m,(int)col);
462 
463   ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
464   ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
465   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
466 
467   /* create the coloring */
468   if (ctype == IS_COLORING_GLOBAL) {
469     if (!dd->localcoloring) {
470       ierr = PetscMalloc1(nc*nx,&colors);CHKERRQ(ierr);
471       if (dd->ofillcols) {
472         PetscInt tc = 0;
473         for (i=0; i<nc; i++) tc += (PetscInt) (dd->ofillcols[i] > 0);
474         i1 = 0;
475         for (i=xs; i<xs+nx; i++) {
476           for (l=0; l<nc; l++) {
477             if (dd->ofillcols[l] && (i % col)) {
478               colors[i1++] =  nc - 1 + tc*((i % col) - 1) + dd->ofillcols[l];
479             } else {
480               colors[i1++] = l;
481             }
482           }
483         }
484         ncolors = nc + 2*s*tc;
485       } else {
486         i1 = 0;
487         for (i=xs; i<xs+nx; i++) {
488           for (l=0; l<nc; l++) {
489             colors[i1++] = l + nc*(i % col);
490           }
491         }
492         ncolors = nc + nc*(col-1);
493       }
494       ierr = ISColoringCreate(comm,ncolors,nc*nx,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
495     }
496     *coloring = dd->localcoloring;
497   } else if (ctype == IS_COLORING_LOCAL) {
498     if (!dd->ghostedcoloring) {
499       ierr = PetscMalloc1(nc*gnx,&colors);CHKERRQ(ierr);
500       i1   = 0;
501       for (i=gxs; i<gxs+gnx; i++) {
502         for (l=0; l<nc; l++) {
503           /* the complicated stuff is to handle periodic boundaries */
504           colors[i1++] = l + nc*(SetInRange(i,m) % col);
505         }
506       }
507       ncolors = nc + nc*(col-1);
508       ierr    = ISColoringCreate(comm,ncolors,nc*gnx,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
509       ierr    = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
510     }
511     *coloring = dd->ghostedcoloring;
512   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
513   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
514   PetscFunctionReturn(0);
515 }
516 
517 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
518 {
519   PetscErrorCode   ierr;
520   PetscInt         xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,dim,s,k,nc;
521   PetscInt         ncolors;
522   MPI_Comm         comm;
523   DMBoundaryType bx,by;
524   ISColoringValue  *colors;
525   DM_DA            *dd = (DM_DA*)da->data;
526 
527   PetscFunctionBegin;
528   /*
529          nc - number of components per grid point
530          col - number of colors needed in one direction for single component problem
531 
532   */
533   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,0);CHKERRQ(ierr);
534   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
535   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
536   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
537 
538   if (bx == DM_BOUNDARY_PERIODIC && (m % 5)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible by 5\n");
539   if (by == DM_BOUNDARY_PERIODIC && (n % 5)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible by 5\n");
540 
541   /* create the coloring */
542   if (ctype == IS_COLORING_GLOBAL) {
543     if (!dd->localcoloring) {
544       ierr = PetscMalloc1(nc*nx*ny,&colors);CHKERRQ(ierr);
545       ii   = 0;
546       for (j=ys; j<ys+ny; j++) {
547         for (i=xs; i<xs+nx; i++) {
548           for (k=0; k<nc; k++) {
549             colors[ii++] = k + nc*((3*j+i) % 5);
550           }
551         }
552       }
553       ncolors = 5*nc;
554       ierr    = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
555     }
556     *coloring = dd->localcoloring;
557   } else if (ctype == IS_COLORING_LOCAL) {
558     if (!dd->ghostedcoloring) {
559       ierr = PetscMalloc1(nc*gnx*gny,&colors);CHKERRQ(ierr);
560       ii = 0;
561       for (j=gys; j<gys+gny; j++) {
562         for (i=gxs; i<gxs+gnx; i++) {
563           for (k=0; k<nc; k++) {
564             colors[ii++] = k + nc*((3*SetInRange(j,n) + SetInRange(i,m)) % 5);
565           }
566         }
567       }
568       ncolors = 5*nc;
569       ierr    = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
570       ierr    = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
571     }
572     *coloring = dd->ghostedcoloring;
573   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
574   PetscFunctionReturn(0);
575 }
576 
577 /* =========================================================================== */
578 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM,Mat);
579 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM,Mat);
580 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM,Mat);
581 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM,Mat);
582 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM,Mat);
583 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM,Mat);
584 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM,Mat);
585 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM,Mat);
586 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM,Mat);
587 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM,Mat);
588 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM,Mat);
589 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM,Mat);
590 extern PetscErrorCode DMCreateMatrix_DA_IS(DM,Mat);
591 
592 /*@C
593    MatSetupDM - Sets the DMDA that is to be used by the HYPRE_StructMatrix PETSc matrix
594 
595    Logically Collective on Mat
596 
597    Input Parameters:
598 +  mat - the matrix
599 -  da - the da
600 
601    Level: intermediate
602 
603 @*/
604 PetscErrorCode MatSetupDM(Mat mat,DM da)
605 {
606   PetscErrorCode ierr;
607 
608   PetscFunctionBegin;
609   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
610   PetscValidHeaderSpecificType(da,DM_CLASSID,1,DMDA);
611   ierr = PetscTryMethod(mat,"MatSetupDM_C",(Mat,DM),(mat,da));CHKERRQ(ierr);
612   PetscFunctionReturn(0);
613 }
614 
615 PetscErrorCode  MatView_MPI_DA(Mat A,PetscViewer viewer)
616 {
617   DM                da;
618   PetscErrorCode    ierr;
619   const char        *prefix;
620   Mat               Anatural;
621   AO                ao;
622   PetscInt          rstart,rend,*petsc,i;
623   IS                is;
624   MPI_Comm          comm;
625   PetscViewerFormat format;
626 
627   PetscFunctionBegin;
628   /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */
629   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
630   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(0);
631 
632   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
633   ierr = MatGetDM(A, &da);CHKERRQ(ierr);
634   if (!da) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
635 
636   ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);
637   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
638   ierr = PetscMalloc1(rend-rstart,&petsc);CHKERRQ(ierr);
639   for (i=rstart; i<rend; i++) petsc[i-rstart] = i;
640   ierr = AOApplicationToPetsc(ao,rend-rstart,petsc);CHKERRQ(ierr);
641   ierr = ISCreateGeneral(comm,rend-rstart,petsc,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
642 
643   /* call viewer on natural ordering */
644   ierr = MatCreateSubMatrix(A,is,is,MAT_INITIAL_MATRIX,&Anatural);CHKERRQ(ierr);
645   ierr = ISDestroy(&is);CHKERRQ(ierr);
646   ierr = PetscObjectGetOptionsPrefix((PetscObject)A,&prefix);CHKERRQ(ierr);
647   ierr = PetscObjectSetOptionsPrefix((PetscObject)Anatural,prefix);CHKERRQ(ierr);
648   ierr = PetscObjectSetName((PetscObject)Anatural,((PetscObject)A)->name);CHKERRQ(ierr);
649   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE;
650   ierr = MatView(Anatural,viewer);CHKERRQ(ierr);
651   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE;
652   ierr = MatDestroy(&Anatural);CHKERRQ(ierr);
653   PetscFunctionReturn(0);
654 }
655 
656 PetscErrorCode  MatLoad_MPI_DA(Mat A,PetscViewer viewer)
657 {
658   DM             da;
659   PetscErrorCode ierr;
660   Mat            Anatural,Aapp;
661   AO             ao;
662   PetscInt       rstart,rend,*app,i,m,n,M,N;
663   IS             is;
664   MPI_Comm       comm;
665 
666   PetscFunctionBegin;
667   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
668   ierr = MatGetDM(A, &da);CHKERRQ(ierr);
669   if (!da) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
670 
671   /* Load the matrix in natural ordering */
672   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Anatural);CHKERRQ(ierr);
673   ierr = MatSetType(Anatural,((PetscObject)A)->type_name);CHKERRQ(ierr);
674   ierr = MatGetSize(A,&M,&N);CHKERRQ(ierr);
675   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
676   ierr = MatSetSizes(Anatural,m,n,M,N);CHKERRQ(ierr);
677   ierr = MatLoad(Anatural,viewer);CHKERRQ(ierr);
678 
679   /* Map natural ordering to application ordering and create IS */
680   ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);
681   ierr = MatGetOwnershipRange(Anatural,&rstart,&rend);CHKERRQ(ierr);
682   ierr = PetscMalloc1(rend-rstart,&app);CHKERRQ(ierr);
683   for (i=rstart; i<rend; i++) app[i-rstart] = i;
684   ierr = AOPetscToApplication(ao,rend-rstart,app);CHKERRQ(ierr);
685   ierr = ISCreateGeneral(comm,rend-rstart,app,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
686 
687   /* Do permutation and replace header */
688   ierr = MatCreateSubMatrix(Anatural,is,is,MAT_INITIAL_MATRIX,&Aapp);CHKERRQ(ierr);
689   ierr = MatHeaderReplace(A,&Aapp);CHKERRQ(ierr);
690   ierr = ISDestroy(&is);CHKERRQ(ierr);
691   ierr = MatDestroy(&Anatural);CHKERRQ(ierr);
692   PetscFunctionReturn(0);
693 }
694 
695 PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J)
696 {
697   PetscErrorCode ierr;
698   PetscInt       dim,dof,nx,ny,nz,dims[3],starts[3],M,N,P;
699   Mat            A;
700   MPI_Comm       comm;
701   MatType        Atype;
702   PetscSection   section, sectionGlobal;
703   void           (*aij)(void)=NULL,(*baij)(void)=NULL,(*sbaij)(void)=NULL,(*sell)(void)=NULL,(*is)(void)=NULL;
704   MatType        mtype;
705   PetscMPIInt    size;
706   DM_DA          *dd = (DM_DA*)da->data;
707 
708   PetscFunctionBegin;
709   ierr = MatInitializePackage();CHKERRQ(ierr);
710   mtype = da->mattype;
711 
712   ierr = DMGetSection(da, &section);CHKERRQ(ierr);
713   if (section) {
714     PetscInt  bs = -1;
715     PetscInt  localSize;
716     PetscBool isShell, isBlock, isSeqBlock, isMPIBlock, isSymBlock, isSymSeqBlock, isSymMPIBlock, isSymmetric;
717 
718     ierr = DMGetGlobalSection(da, &sectionGlobal);CHKERRQ(ierr);
719     ierr = PetscSectionGetConstrainedStorageSize(sectionGlobal, &localSize);CHKERRQ(ierr);
720     ierr = MatCreate(PetscObjectComm((PetscObject)da),&A);CHKERRQ(ierr);
721     ierr = MatSetSizes(A,localSize,localSize,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
722     ierr = MatSetType(A,mtype);CHKERRQ(ierr);
723     ierr = PetscStrcmp(mtype,MATSHELL,&isShell);CHKERRQ(ierr);
724     ierr = PetscStrcmp(mtype,MATBAIJ,&isBlock);CHKERRQ(ierr);
725     ierr = PetscStrcmp(mtype,MATSEQBAIJ,&isSeqBlock);CHKERRQ(ierr);
726     ierr = PetscStrcmp(mtype,MATMPIBAIJ,&isMPIBlock);CHKERRQ(ierr);
727     ierr = PetscStrcmp(mtype,MATSBAIJ,&isSymBlock);CHKERRQ(ierr);
728     ierr = PetscStrcmp(mtype,MATSEQSBAIJ,&isSymSeqBlock);CHKERRQ(ierr);
729     ierr = PetscStrcmp(mtype,MATMPISBAIJ,&isSymMPIBlock);CHKERRQ(ierr);
730     /* Check for symmetric storage */
731     isSymmetric = (PetscBool) (isSymBlock || isSymSeqBlock || isSymMPIBlock);
732     if (isSymmetric) {
733       ierr = MatSetOption(*J, MAT_IGNORE_LOWER_TRIANGULAR, PETSC_TRUE);CHKERRQ(ierr);
734     }
735     if (!isShell) {
736       PetscInt *dnz, *onz, *dnzu, *onzu, bsLocal;
737 
738       if (bs < 0) {
739         if (isBlock || isSeqBlock || isMPIBlock || isSymBlock || isSymSeqBlock || isSymMPIBlock) {
740           PetscInt pStart, pEnd, p, dof;
741 
742           ierr = PetscSectionGetChart(sectionGlobal, &pStart, &pEnd);CHKERRQ(ierr);
743           for (p = pStart; p < pEnd; ++p) {
744             ierr = PetscSectionGetDof(sectionGlobal, p, &dof);CHKERRQ(ierr);
745             if (dof) {
746               bs = dof;
747               break;
748             }
749           }
750         } else {
751           bs = 1;
752         }
753         /* Must have same blocksize on all procs (some might have no points) */
754         bsLocal = bs;
755         ierr    = MPIU_Allreduce(&bsLocal, &bs, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)da));CHKERRQ(ierr);
756       }
757       ierr = PetscCalloc4(localSize/bs, &dnz, localSize/bs, &onz, localSize/bs, &dnzu, localSize/bs, &onzu);CHKERRQ(ierr);
758       /* ierr = DMPlexPreallocateOperator(dm, bs, section, sectionGlobal, dnz, onz, dnzu, onzu, *J, fillMatrix);CHKERRQ(ierr); */
759       ierr = PetscFree4(dnz, onz, dnzu, onzu);CHKERRQ(ierr);
760     }
761   }
762   /*
763                                   m
764           ------------------------------------------------------
765          |                                                     |
766          |                                                     |
767          |               ----------------------                |
768          |               |                    |                |
769       n  |           ny  |                    |                |
770          |               |                    |                |
771          |               .---------------------                |
772          |             (xs,ys)     nx                          |
773          |            .                                        |
774          |         (gxs,gys)                                   |
775          |                                                     |
776           -----------------------------------------------------
777   */
778 
779   /*
780          nc - number of components per grid point
781          col - number of colors needed in one direction for single component problem
782 
783   */
784   M   = dd->M;
785   N   = dd->N;
786   P   = dd->P;
787   dim = da->dim;
788   dof = dd->w;
789   /* ierr = DMDAGetInfo(da,&dim,&M,&N,&P,0,0,0,&dof,0,0,0,0,0);CHKERRQ(ierr); */
790   ierr = DMDAGetCorners(da,0,0,0,&nx,&ny,&nz);CHKERRQ(ierr);
791   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
792   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
793   ierr = MatSetSizes(A,dof*nx*ny*nz,dof*nx*ny*nz,dof*M*N*P,dof*M*N*P);CHKERRQ(ierr);
794   ierr = MatSetType(A,mtype);CHKERRQ(ierr);
795   ierr = MatSetDM(A,da);CHKERRQ(ierr);
796   if (da->structure_only) {
797     ierr = MatSetOption(A,MAT_STRUCTURE_ONLY,PETSC_TRUE);CHKERRQ(ierr);
798   }
799   ierr = MatGetType(A,&Atype);CHKERRQ(ierr);
800   /*
801      We do not provide a getmatrix function in the DMDA operations because
802    the basic DMDA does not know about matrices. We think of DMDA as being more
803    more low-level than matrices. This is kind of cheating but, cause sometimes
804    we think of DMDA has higher level than matrices.
805 
806      We could switch based on Atype (or mtype), but we do not since the
807    specialized setting routines depend only on the particular preallocation
808    details of the matrix, not the type itself.
809   */
810   ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJSetPreallocation_C",&aij);CHKERRQ(ierr);
811   if (!aij) {
812     ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJSetPreallocation_C",&aij);CHKERRQ(ierr);
813   }
814   if (!aij) {
815     ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIBAIJSetPreallocation_C",&baij);CHKERRQ(ierr);
816     if (!baij) {
817       ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",&baij);CHKERRQ(ierr);
818     }
819     if (!baij) {
820       ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPISBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr);
821       if (!sbaij) {
822         ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr);
823       }
824       if (!sbaij) {
825         ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPISELLSetPreallocation_C",&sell);CHKERRQ(ierr);
826         if (!sell) {
827           ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqSELLSetPreallocation_C",&sell);CHKERRQ(ierr);
828         }
829       }
830       if (!sell) {
831         ierr = PetscObjectQueryFunction((PetscObject)A,"MatISSetPreallocation_C",&is);CHKERRQ(ierr);
832       }
833     }
834   }
835   if (aij) {
836     if (dim == 1) {
837       if (dd->ofill) {
838         ierr = DMCreateMatrix_DA_1d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
839       } else {
840         ierr = DMCreateMatrix_DA_1d_MPIAIJ(da,A);CHKERRQ(ierr);
841       }
842     } else if (dim == 2) {
843       if (dd->ofill) {
844         ierr = DMCreateMatrix_DA_2d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
845       } else {
846         ierr = DMCreateMatrix_DA_2d_MPIAIJ(da,A);CHKERRQ(ierr);
847       }
848     } else if (dim == 3) {
849       if (dd->ofill) {
850         ierr = DMCreateMatrix_DA_3d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
851       } else {
852         ierr = DMCreateMatrix_DA_3d_MPIAIJ(da,A);CHKERRQ(ierr);
853       }
854     }
855   } else if (baij) {
856     if (dim == 2) {
857       ierr = DMCreateMatrix_DA_2d_MPIBAIJ(da,A);CHKERRQ(ierr);
858     } else if (dim == 3) {
859       ierr = DMCreateMatrix_DA_3d_MPIBAIJ(da,A);CHKERRQ(ierr);
860     } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
861   } else if (sbaij) {
862     if (dim == 2) {
863       ierr = DMCreateMatrix_DA_2d_MPISBAIJ(da,A);CHKERRQ(ierr);
864     } else if (dim == 3) {
865       ierr = DMCreateMatrix_DA_3d_MPISBAIJ(da,A);CHKERRQ(ierr);
866     } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
867   } else if (sell) {
868      if (dim == 2) {
869        ierr = DMCreateMatrix_DA_2d_MPISELL(da,A);CHKERRQ(ierr);
870      } else if (dim == 3) {
871        ierr = DMCreateMatrix_DA_3d_MPISELL(da,A);CHKERRQ(ierr);
872      } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
873   } else if (is) {
874     ierr = DMCreateMatrix_DA_IS(da,A);CHKERRQ(ierr);
875   } else {
876     ISLocalToGlobalMapping ltog;
877 
878     ierr = MatSetBlockSize(A,dof);CHKERRQ(ierr);
879     ierr = MatSetUp(A);CHKERRQ(ierr);
880     ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
881     ierr = MatSetLocalToGlobalMapping(A,ltog,ltog);CHKERRQ(ierr);
882   }
883   ierr = DMDAGetGhostCorners(da,&starts[0],&starts[1],&starts[2],&dims[0],&dims[1],&dims[2]);CHKERRQ(ierr);
884   ierr = MatSetStencil(A,dim,dims,starts,dof);CHKERRQ(ierr);
885   ierr = MatSetDM(A,da);CHKERRQ(ierr);
886   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
887   if (size > 1) {
888     /* change viewer to display matrix in natural ordering */
889     ierr = MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA);CHKERRQ(ierr);
890     ierr = MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA);CHKERRQ(ierr);
891   }
892   ierr = MatSetFromOptions(A);CHKERRQ(ierr);
893   *J = A;
894   PetscFunctionReturn(0);
895 }
896 
897 /* ---------------------------------------------------------------------------------*/
898 PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
899 
900 PetscErrorCode DMCreateMatrix_DA_IS(DM dm,Mat J)
901 {
902   DM_DA                  *da = (DM_DA*)dm->data;
903   Mat                    lJ;
904   ISLocalToGlobalMapping ltog;
905   IS                     is_loc_filt, is_glob;
906   const PetscInt         *e_loc,*idx;
907   PetscInt               nel,nen,nv,dof,dim,*gidx,nb;
908   PetscBool              flg;
909   PetscErrorCode         ierr;
910 
911   /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted)
912      We need to filter the local indices that are represented through the DMDAGetElements decomposition
913      This is because the size of the local matrices in MATIS is the local size of the l2g map */
914   PetscFunctionBegin;
915   dof  = da->w;
916   dim  = dm->dim;
917 
918   ierr = MatSetBlockSize(J,dof);CHKERRQ(ierr);
919 
920   /* get local elements indices in local DMDA numbering */
921   ierr = DMDAGetElements(dm,&nel,&nen,&e_loc);CHKERRQ(ierr); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */
922   ierr = ISCreateBlock(PetscObjectComm((PetscObject)dm),dof,nel*nen,e_loc,PETSC_COPY_VALUES,&is_loc_filt);CHKERRQ(ierr);
923   ierr = DMDARestoreElements(dm,&nel,&nen,&e_loc);CHKERRQ(ierr);
924 
925   /* obtain a consistent local ordering for MATIS */
926   ierr = ISSortRemoveDups(is_loc_filt);CHKERRQ(ierr);
927   ierr = ISBlockGetLocalSize(is_loc_filt,&nb);CHKERRQ(ierr);
928   ierr = DMGetLocalToGlobalMapping(dm,&ltog);CHKERRQ(ierr);
929   ierr = ISLocalToGlobalMappingGetSize(ltog,&nv);CHKERRQ(ierr);
930   ierr = PetscMalloc1(PetscMax(nb,nv/dof),&gidx);CHKERRQ(ierr);
931   ierr = ISBlockGetIndices(is_loc_filt,&idx);CHKERRQ(ierr);
932   ierr = ISLocalToGlobalMappingApplyBlock(ltog,nb,idx,gidx);CHKERRQ(ierr);
933   ierr = ISBlockRestoreIndices(is_loc_filt,&idx);CHKERRQ(ierr);
934   ierr = ISCreateBlock(PetscObjectComm((PetscObject)dm),dof,nb,gidx,PETSC_USE_POINTER,&is_glob);CHKERRQ(ierr);
935   ierr = ISLocalToGlobalMappingCreateIS(is_glob,&ltog);CHKERRQ(ierr);
936   ierr = ISDestroy(&is_glob);CHKERRQ(ierr);
937   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
938   ierr = ISLocalToGlobalMappingDestroy(&ltog);CHKERRQ(ierr);
939 
940   /* We also attach a l2g map to the local matrices to have MatSetValueLocal to work */
941   ierr = MatISGetLocalMat(J,&lJ);CHKERRQ(ierr);
942   ierr = ISLocalToGlobalMappingCreateIS(is_loc_filt,&ltog);CHKERRQ(ierr);
943   ierr = ISDestroy(&is_loc_filt);CHKERRQ(ierr);
944   ierr = ISCreateStride(PetscObjectComm((PetscObject)lJ),nv/dof,0,1,&is_glob);CHKERRQ(ierr);
945   ierr = ISGetIndices(is_glob,&idx);CHKERRQ(ierr);
946   ierr = ISGlobalToLocalMappingApplyBlock(ltog,IS_GTOLM_MASK,nv/dof,idx,&nb,gidx);CHKERRQ(ierr);
947   ierr = ISRestoreIndices(is_glob,&idx);CHKERRQ(ierr);
948   ierr = ISDestroy(&is_glob);CHKERRQ(ierr);
949   ierr = ISLocalToGlobalMappingDestroy(&ltog);CHKERRQ(ierr);
950   ierr = ISCreateBlock(PETSC_COMM_SELF,dof,nb,gidx,PETSC_USE_POINTER,&is_loc_filt);CHKERRQ(ierr);
951   ierr = ISLocalToGlobalMappingCreateIS(is_loc_filt,&ltog);CHKERRQ(ierr);
952   ierr = ISDestroy(&is_loc_filt);CHKERRQ(ierr);
953   ierr = MatSetLocalToGlobalMapping(lJ,ltog,ltog);CHKERRQ(ierr);
954   ierr = ISLocalToGlobalMappingDestroy(&ltog);CHKERRQ(ierr);
955   ierr = PetscFree(gidx);CHKERRQ(ierr);
956 
957   /* Preallocation (not exact): we reuse the preallocation routines of the assembled version  */
958   flg = dm->prealloc_only;
959   dm->prealloc_only = PETSC_TRUE;
960   switch (dim) {
961   case 1:
962     ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",MatISSetPreallocation_IS);CHKERRQ(ierr);
963     ierr = DMCreateMatrix_DA_1d_MPIAIJ(dm,J);CHKERRQ(ierr);
964     ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
965     break;
966   case 2:
967     ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",MatISSetPreallocation_IS);CHKERRQ(ierr);
968     ierr = DMCreateMatrix_DA_2d_MPIAIJ(dm,J);CHKERRQ(ierr);
969     ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
970     break;
971   case 3:
972     ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",MatISSetPreallocation_IS);CHKERRQ(ierr);
973     ierr = DMCreateMatrix_DA_3d_MPIAIJ(dm,J);CHKERRQ(ierr);
974     ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
975     break;
976   default:
977     SETERRQ1(PetscObjectComm((PetscObject)dm),PETSC_ERR_SUP,"Unhandled dimension %d",dim);
978     break;
979   }
980   dm->prealloc_only = flg;
981   PetscFunctionReturn(0);
982 }
983 
984 PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da,Mat J)
985 {
986   PetscErrorCode         ierr;
987   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p;
988   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
989   MPI_Comm               comm;
990   PetscScalar            *values;
991   DMBoundaryType         bx,by;
992   ISLocalToGlobalMapping ltog;
993   DMDAStencilType        st;
994 
995   PetscFunctionBegin;
996   /*
997          nc - number of components per grid point
998          col - number of colors needed in one direction for single component problem
999 
1000   */
1001   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
1002   col  = 2*s + 1;
1003   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
1004   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
1005   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1006 
1007   ierr = PetscMalloc2(nc,&rows,col*col*nc*nc,&cols);CHKERRQ(ierr);
1008   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1009 
1010   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1011   /* determine the matrix preallocation information */
1012   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
1013   for (i=xs; i<xs+nx; i++) {
1014 
1015     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1016     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1017 
1018     for (j=ys; j<ys+ny; j++) {
1019       slot = i - gxs + gnx*(j - gys);
1020 
1021       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1022       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1023 
1024       cnt = 0;
1025       for (k=0; k<nc; k++) {
1026         for (l=lstart; l<lend+1; l++) {
1027           for (p=pstart; p<pend+1; p++) {
1028             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
1029               cols[cnt++] = k + nc*(slot + gnx*l + p);
1030             }
1031           }
1032         }
1033         rows[k] = k + nc*(slot);
1034       }
1035       ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1036     }
1037   }
1038   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1039   ierr = MatSeqSELLSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1040   ierr = MatMPISELLSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1041   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1042 
1043   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1044 
1045   /*
1046     For each node in the grid: we get the neighbors in the local (on processor ordering
1047     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1048     PETSc ordering.
1049   */
1050   if (!da->prealloc_only) {
1051     ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr);
1052     for (i=xs; i<xs+nx; i++) {
1053 
1054       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1055       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1056 
1057       for (j=ys; j<ys+ny; j++) {
1058         slot = i - gxs + gnx*(j - gys);
1059 
1060         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1061         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1062 
1063         cnt = 0;
1064         for (k=0; k<nc; k++) {
1065           for (l=lstart; l<lend+1; l++) {
1066             for (p=pstart; p<pend+1; p++) {
1067               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
1068                 cols[cnt++] = k + nc*(slot + gnx*l + p);
1069               }
1070             }
1071           }
1072           rows[k] = k + nc*(slot);
1073         }
1074         ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1075       }
1076     }
1077     ierr = PetscFree(values);CHKERRQ(ierr);
1078     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1079     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1080     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1081   }
1082   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1083   PetscFunctionReturn(0);
1084 }
1085 
1086 PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da,Mat J)
1087 {
1088   PetscErrorCode         ierr;
1089   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1090   PetscInt               m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL;
1091   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
1092   MPI_Comm               comm;
1093   PetscScalar            *values;
1094   DMBoundaryType         bx,by,bz;
1095   ISLocalToGlobalMapping ltog;
1096   DMDAStencilType        st;
1097 
1098   PetscFunctionBegin;
1099   /*
1100          nc - number of components per grid point
1101          col - number of colors needed in one direction for single component problem
1102 
1103   */
1104   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1105   col  = 2*s + 1;
1106   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1107   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1108   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1109 
1110   ierr = PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols);CHKERRQ(ierr);
1111   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1112 
1113   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1114   /* determine the matrix preallocation information */
1115   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1116   for (i=xs; i<xs+nx; i++) {
1117     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1118     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1119     for (j=ys; j<ys+ny; j++) {
1120       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1121       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1122       for (k=zs; k<zs+nz; k++) {
1123         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1124         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1125 
1126         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1127 
1128         cnt = 0;
1129         for (l=0; l<nc; l++) {
1130           for (ii=istart; ii<iend+1; ii++) {
1131             for (jj=jstart; jj<jend+1; jj++) {
1132               for (kk=kstart; kk<kend+1; kk++) {
1133                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1134                   cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1135                 }
1136               }
1137             }
1138           }
1139           rows[l] = l + nc*(slot);
1140         }
1141         ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1142       }
1143     }
1144   }
1145   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1146   ierr = MatSeqSELLSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1147   ierr = MatMPISELLSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1148   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1149   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1150 
1151   /*
1152     For each node in the grid: we get the neighbors in the local (on processor ordering
1153     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1154     PETSc ordering.
1155   */
1156   if (!da->prealloc_only) {
1157     ierr = PetscCalloc1(col*col*col*nc*nc*nc,&values);CHKERRQ(ierr);
1158     for (i=xs; i<xs+nx; i++) {
1159       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1160       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1161       for (j=ys; j<ys+ny; j++) {
1162         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1163         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1164         for (k=zs; k<zs+nz; k++) {
1165           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1166           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1167 
1168           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1169 
1170           cnt = 0;
1171           for (l=0; l<nc; l++) {
1172             for (ii=istart; ii<iend+1; ii++) {
1173               for (jj=jstart; jj<jend+1; jj++) {
1174                 for (kk=kstart; kk<kend+1; kk++) {
1175                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1176                     cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1177                   }
1178                 }
1179               }
1180             }
1181             rows[l] = l + nc*(slot);
1182           }
1183           ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1184         }
1185       }
1186     }
1187     ierr = PetscFree(values);CHKERRQ(ierr);
1188     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1189     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1190     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1191   }
1192   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1193   PetscFunctionReturn(0);
1194 }
1195 
1196 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da,Mat J)
1197 {
1198   PetscErrorCode         ierr;
1199   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,M,N;
1200   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
1201   MPI_Comm               comm;
1202   PetscScalar            *values;
1203   DMBoundaryType         bx,by;
1204   ISLocalToGlobalMapping ltog,mltog;
1205   DMDAStencilType        st;
1206   PetscBool              removedups = PETSC_FALSE;
1207 
1208   PetscFunctionBegin;
1209   /*
1210          nc - number of components per grid point
1211          col - number of colors needed in one direction for single component problem
1212 
1213   */
1214   ierr = DMDAGetInfo(da,&dim,&m,&n,&M,&N,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
1215   col  = 2*s + 1;
1216   /*
1217        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1218        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1219   */
1220   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1221   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1222   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
1223   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
1224   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1225 
1226   ierr = PetscMalloc2(nc,&rows,col*col*nc*nc,&cols);CHKERRQ(ierr);
1227   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1228 
1229   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1230   /* determine the matrix preallocation information */
1231   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
1232   for (i=xs; i<xs+nx; i++) {
1233 
1234     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1235     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1236 
1237     for (j=ys; j<ys+ny; j++) {
1238       slot = i - gxs + gnx*(j - gys);
1239 
1240       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1241       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1242 
1243       cnt = 0;
1244       for (k=0; k<nc; k++) {
1245         for (l=lstart; l<lend+1; l++) {
1246           for (p=pstart; p<pend+1; p++) {
1247             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
1248               cols[cnt++] = k + nc*(slot + gnx*l + p);
1249             }
1250           }
1251         }
1252         rows[k] = k + nc*(slot);
1253       }
1254       if (removedups) {
1255         ierr = MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1256       } else {
1257         ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1258       }
1259     }
1260   }
1261   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1262   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1263   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1264   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1265   ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr);
1266   if (!mltog) {
1267     ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1268   }
1269 
1270   /*
1271     For each node in the grid: we get the neighbors in the local (on processor ordering
1272     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1273     PETSc ordering.
1274   */
1275   if (!da->prealloc_only) {
1276     ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr);
1277     for (i=xs; i<xs+nx; i++) {
1278 
1279       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1280       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1281 
1282       for (j=ys; j<ys+ny; j++) {
1283         slot = i - gxs + gnx*(j - gys);
1284 
1285         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1286         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1287 
1288         cnt = 0;
1289         for (k=0; k<nc; k++) {
1290           for (l=lstart; l<lend+1; l++) {
1291             for (p=pstart; p<pend+1; p++) {
1292               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
1293                 cols[cnt++] = k + nc*(slot + gnx*l + p);
1294               }
1295             }
1296           }
1297           rows[k] = k + nc*(slot);
1298         }
1299         ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1300       }
1301     }
1302     ierr = PetscFree(values);CHKERRQ(ierr);
1303     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1304     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1305     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1306   }
1307   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1308   PetscFunctionReturn(0);
1309 }
1310 
1311 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da,Mat J)
1312 {
1313   PetscErrorCode         ierr;
1314   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1315   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt,maxcnt = 0,l,p,M,N;
1316   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
1317   DM_DA                  *dd = (DM_DA*)da->data;
1318   PetscInt               ifill_col,*ofill = dd->ofill, *dfill = dd->dfill;
1319   MPI_Comm               comm;
1320   PetscScalar            *values;
1321   DMBoundaryType         bx,by;
1322   ISLocalToGlobalMapping ltog;
1323   DMDAStencilType        st;
1324   PetscBool              removedups = PETSC_FALSE;
1325 
1326   PetscFunctionBegin;
1327   /*
1328          nc - number of components per grid point
1329          col - number of colors needed in one direction for single component problem
1330 
1331   */
1332   ierr = DMDAGetInfo(da,&dim,&m,&n,&M,&N,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
1333   col  = 2*s + 1;
1334   /*
1335        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1336        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1337   */
1338   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1339   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1340   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
1341   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
1342   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1343 
1344   ierr = PetscMalloc1(col*col*nc,&cols);CHKERRQ(ierr);
1345   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1346 
1347   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1348   /* determine the matrix preallocation information */
1349   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
1350   for (i=xs; i<xs+nx; i++) {
1351 
1352     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1353     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1354 
1355     for (j=ys; j<ys+ny; j++) {
1356       slot = i - gxs + gnx*(j - gys);
1357 
1358       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1359       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1360 
1361       for (k=0; k<nc; k++) {
1362         cnt = 0;
1363         for (l=lstart; l<lend+1; l++) {
1364           for (p=pstart; p<pend+1; p++) {
1365             if (l || p) {
1366               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
1367                 for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p);
1368               }
1369             } else {
1370               if (dfill) {
1371                 for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p);
1372               } else {
1373                 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p);
1374               }
1375             }
1376           }
1377         }
1378         row    = k + nc*(slot);
1379         maxcnt = PetscMax(maxcnt,cnt);
1380         if (removedups) {
1381           ierr   = MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1382         } else {
1383           ierr   = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1384         }
1385       }
1386     }
1387   }
1388   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1389   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1390   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1391   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1392 
1393   /*
1394     For each node in the grid: we get the neighbors in the local (on processor ordering
1395     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1396     PETSc ordering.
1397   */
1398   if (!da->prealloc_only) {
1399     ierr = PetscCalloc1(maxcnt,&values);CHKERRQ(ierr);
1400     for (i=xs; i<xs+nx; i++) {
1401 
1402       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1403       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1404 
1405       for (j=ys; j<ys+ny; j++) {
1406         slot = i - gxs + gnx*(j - gys);
1407 
1408         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1409         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1410 
1411         for (k=0; k<nc; k++) {
1412           cnt = 0;
1413           for (l=lstart; l<lend+1; l++) {
1414             for (p=pstart; p<pend+1; p++) {
1415               if (l || p) {
1416                 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
1417                   for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p);
1418                 }
1419               } else {
1420                 if (dfill) {
1421                   for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p);
1422                 } else {
1423                   for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p);
1424                 }
1425               }
1426             }
1427           }
1428           row  = k + nc*(slot);
1429           ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1430         }
1431       }
1432     }
1433     ierr = PetscFree(values);CHKERRQ(ierr);
1434     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1435     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1436     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1437   }
1438   ierr = PetscFree(cols);CHKERRQ(ierr);
1439   PetscFunctionReturn(0);
1440 }
1441 
1442 /* ---------------------------------------------------------------------------------*/
1443 
1444 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da,Mat J)
1445 {
1446   PetscErrorCode         ierr;
1447   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1448   PetscInt               m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL;
1449   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
1450   MPI_Comm               comm;
1451   PetscScalar            *values;
1452   DMBoundaryType         bx,by,bz;
1453   ISLocalToGlobalMapping ltog,mltog;
1454   DMDAStencilType        st;
1455   PetscBool              removedups = PETSC_FALSE;
1456 
1457   PetscFunctionBegin;
1458   /*
1459          nc - number of components per grid point
1460          col - number of colors needed in one direction for single component problem
1461 
1462   */
1463   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1464   col  = 2*s + 1;
1465 
1466   /*
1467        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1468        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1469   */
1470   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1471   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1472   if (P == 1 && 2*s >= p) removedups = PETSC_TRUE;
1473 
1474   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1475   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1476   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1477 
1478   ierr = PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols);CHKERRQ(ierr);
1479   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1480 
1481   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1482   /* determine the matrix preallocation information */
1483   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1484   for (i=xs; i<xs+nx; i++) {
1485     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1486     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1487     for (j=ys; j<ys+ny; j++) {
1488       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1489       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1490       for (k=zs; k<zs+nz; k++) {
1491         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1492         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1493 
1494         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1495 
1496         cnt = 0;
1497         for (l=0; l<nc; l++) {
1498           for (ii=istart; ii<iend+1; ii++) {
1499             for (jj=jstart; jj<jend+1; jj++) {
1500               for (kk=kstart; kk<kend+1; kk++) {
1501                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1502                   cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1503                 }
1504               }
1505             }
1506           }
1507           rows[l] = l + nc*(slot);
1508         }
1509         if (removedups) {
1510           ierr = MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1511         } else {
1512           ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1513         }
1514       }
1515     }
1516   }
1517   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1518   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1519   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1520   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1521   ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr);
1522   if (!mltog) {
1523     ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1524   }
1525 
1526   /*
1527     For each node in the grid: we get the neighbors in the local (on processor ordering
1528     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1529     PETSc ordering.
1530   */
1531   if (!da->prealloc_only) {
1532     ierr = PetscCalloc1(col*col*col*nc*nc*nc,&values);CHKERRQ(ierr);
1533     for (i=xs; i<xs+nx; i++) {
1534       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1535       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1536       for (j=ys; j<ys+ny; j++) {
1537         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1538         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1539         for (k=zs; k<zs+nz; k++) {
1540           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1541           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1542 
1543           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1544 
1545           cnt = 0;
1546           for (l=0; l<nc; l++) {
1547             for (ii=istart; ii<iend+1; ii++) {
1548               for (jj=jstart; jj<jend+1; jj++) {
1549                 for (kk=kstart; kk<kend+1; kk++) {
1550                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1551                     cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1552                   }
1553                 }
1554               }
1555             }
1556             rows[l] = l + nc*(slot);
1557           }
1558           ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1559         }
1560       }
1561     }
1562     ierr = PetscFree(values);CHKERRQ(ierr);
1563     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1564     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1565     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1566   }
1567   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1568   PetscFunctionReturn(0);
1569 }
1570 
1571 /* ---------------------------------------------------------------------------------*/
1572 
1573 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da,Mat J)
1574 {
1575   PetscErrorCode         ierr;
1576   DM_DA                  *dd = (DM_DA*)da->data;
1577   PetscInt               xs,nx,i,j,gxs,gnx,row,k,l;
1578   PetscInt               m,dim,s,*cols = NULL,nc,cnt,maxcnt = 0,*ocols;
1579   PetscInt               *ofill = dd->ofill,*dfill = dd->dfill;
1580   PetscScalar            *values;
1581   DMBoundaryType         bx;
1582   ISLocalToGlobalMapping ltog;
1583   PetscMPIInt            rank,size;
1584 
1585   PetscFunctionBegin;
1586   if (dd->bx == DM_BOUNDARY_PERIODIC) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"With fill provided not implemented with periodic boundary conditions");
1587   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)da),&rank);CHKERRQ(ierr);
1588   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)da),&size);CHKERRQ(ierr);
1589 
1590   /*
1591          nc - number of components per grid point
1592 
1593   */
1594   ierr = DMDAGetInfo(da,&dim,&m,0,0,0,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr);
1595   ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
1596   ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
1597 
1598   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1599   ierr = PetscCalloc2(nx*nc,&cols,nx*nc,&ocols);CHKERRQ(ierr);
1600 
1601   /*
1602         note should be smaller for first and last process with no periodic
1603         does not handle dfill
1604   */
1605   cnt = 0;
1606   /* coupling with process to the left */
1607   for (i=0; i<s; i++) {
1608     for (j=0; j<nc; j++) {
1609       ocols[cnt] = ((!rank) ? 0 : (s - i)*(ofill[j+1] - ofill[j]));
1610       cols[cnt]  = dfill[j+1] - dfill[j] + (s + i)*(ofill[j+1] - ofill[j]);
1611       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1612       cnt++;
1613     }
1614   }
1615   for (i=s; i<nx-s; i++) {
1616     for (j=0; j<nc; j++) {
1617       cols[cnt] = dfill[j+1] - dfill[j] + 2*s*(ofill[j+1] - ofill[j]);
1618       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1619       cnt++;
1620     }
1621   }
1622   /* coupling with process to the right */
1623   for (i=nx-s; i<nx; i++) {
1624     for (j=0; j<nc; j++) {
1625       ocols[cnt] = ((rank == (size-1)) ? 0 : (i - nx + s + 1)*(ofill[j+1] - ofill[j]));
1626       cols[cnt]  = dfill[j+1] - dfill[j] + (s + nx - i - 1)*(ofill[j+1] - ofill[j]);
1627       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1628       cnt++;
1629     }
1630   }
1631 
1632   ierr = MatSeqAIJSetPreallocation(J,0,cols);CHKERRQ(ierr);
1633   ierr = MatMPIAIJSetPreallocation(J,0,cols,0,ocols);CHKERRQ(ierr);
1634   ierr = PetscFree2(cols,ocols);CHKERRQ(ierr);
1635 
1636   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1637   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1638 
1639   /*
1640     For each node in the grid: we get the neighbors in the local (on processor ordering
1641     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1642     PETSc ordering.
1643   */
1644   if (!da->prealloc_only) {
1645     ierr = PetscCalloc2(maxcnt,&values,maxcnt,&cols);CHKERRQ(ierr);
1646 
1647     row = xs*nc;
1648     /* coupling with process to the left */
1649     for (i=xs; i<xs+s; i++) {
1650       for (j=0; j<nc; j++) {
1651         cnt = 0;
1652         if (rank) {
1653           for (l=0; l<s; l++) {
1654             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1655           }
1656         }
1657         if (dfill) {
1658           for (k=dfill[j]; k<dfill[j+1]; k++) {
1659             cols[cnt++] = i*nc + dfill[k];
1660           }
1661         } else {
1662           for (k=0; k<nc; k++) {
1663             cols[cnt++] = i*nc + k;
1664           }
1665         }
1666         for (l=0; l<s; l++) {
1667           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1668         }
1669         ierr = MatSetValues(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1670         row++;
1671       }
1672     }
1673     for (i=xs+s; i<xs+nx-s; i++) {
1674       for (j=0; j<nc; j++) {
1675         cnt = 0;
1676         for (l=0; l<s; l++) {
1677           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1678         }
1679         if (dfill) {
1680           for (k=dfill[j]; k<dfill[j+1]; k++) {
1681             cols[cnt++] = i*nc + dfill[k];
1682           }
1683         } else {
1684           for (k=0; k<nc; k++) {
1685             cols[cnt++] = i*nc + k;
1686           }
1687         }
1688         for (l=0; l<s; l++) {
1689           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1690         }
1691         ierr = MatSetValues(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1692         row++;
1693       }
1694     }
1695     /* coupling with process to the right */
1696     for (i=xs+nx-s; i<xs+nx; i++) {
1697       for (j=0; j<nc; j++) {
1698         cnt = 0;
1699         for (l=0; l<s; l++) {
1700           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1701         }
1702         if (dfill) {
1703           for (k=dfill[j]; k<dfill[j+1]; k++) {
1704             cols[cnt++] = i*nc + dfill[k];
1705           }
1706         } else {
1707           for (k=0; k<nc; k++) {
1708             cols[cnt++] = i*nc + k;
1709           }
1710         }
1711         if (rank < size-1) {
1712           for (l=0; l<s; l++) {
1713             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1714           }
1715         }
1716         ierr = MatSetValues(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1717         row++;
1718       }
1719     }
1720     ierr = PetscFree2(values,cols);CHKERRQ(ierr);
1721     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1722     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1723     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1724   }
1725   PetscFunctionReturn(0);
1726 }
1727 
1728 /* ---------------------------------------------------------------------------------*/
1729 
1730 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da,Mat J)
1731 {
1732   PetscErrorCode         ierr;
1733   PetscInt               xs,nx,i,i1,slot,gxs,gnx;
1734   PetscInt               m,dim,s,*cols = NULL,nc,*rows = NULL,col,cnt,l;
1735   PetscInt               istart,iend;
1736   PetscScalar            *values;
1737   DMBoundaryType         bx;
1738   ISLocalToGlobalMapping ltog,mltog;
1739 
1740   PetscFunctionBegin;
1741   /*
1742          nc - number of components per grid point
1743          col - number of colors needed in one direction for single component problem
1744 
1745   */
1746   ierr = DMDAGetInfo(da,&dim,&m,0,0,0,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr);
1747   col  = 2*s + 1;
1748 
1749   ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
1750   ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
1751 
1752   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1753   ierr = MatSeqAIJSetPreallocation(J,col*nc,0);CHKERRQ(ierr);
1754   ierr = MatMPIAIJSetPreallocation(J,col*nc,0,col*nc,0);CHKERRQ(ierr);
1755 
1756   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1757   ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr);
1758   if (!mltog) {
1759     ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1760   }
1761 
1762   /*
1763     For each node in the grid: we get the neighbors in the local (on processor ordering
1764     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1765     PETSc ordering.
1766   */
1767   if (!da->prealloc_only) {
1768     ierr = PetscMalloc2(nc,&rows,col*nc*nc,&cols);CHKERRQ(ierr);
1769     ierr = PetscCalloc1(col*nc*nc,&values);CHKERRQ(ierr);
1770     for (i=xs; i<xs+nx; i++) {
1771       istart = PetscMax(-s,gxs - i);
1772       iend   = PetscMin(s,gxs + gnx - i - 1);
1773       slot   = i - gxs;
1774 
1775       cnt = 0;
1776       for (l=0; l<nc; l++) {
1777         for (i1=istart; i1<iend+1; i1++) {
1778           cols[cnt++] = l + nc*(slot + i1);
1779         }
1780         rows[l] = l + nc*(slot);
1781       }
1782       ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1783     }
1784     ierr = PetscFree(values);CHKERRQ(ierr);
1785     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1786     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1787     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1788     ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1789   }
1790   PetscFunctionReturn(0);
1791 }
1792 
1793 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da,Mat J)
1794 {
1795   PetscErrorCode         ierr;
1796   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1797   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
1798   PetscInt               istart,iend,jstart,jend,ii,jj;
1799   MPI_Comm               comm;
1800   PetscScalar            *values;
1801   DMBoundaryType         bx,by;
1802   DMDAStencilType        st;
1803   ISLocalToGlobalMapping ltog;
1804 
1805   PetscFunctionBegin;
1806   /*
1807      nc - number of components per grid point
1808      col - number of colors needed in one direction for single component problem
1809   */
1810   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
1811   col  = 2*s + 1;
1812 
1813   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
1814   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
1815   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1816 
1817   ierr = PetscMalloc1(col*col*nc*nc,&cols);CHKERRQ(ierr);
1818 
1819   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1820 
1821   /* determine the matrix preallocation information */
1822   ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
1823   for (i=xs; i<xs+nx; i++) {
1824     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1825     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1826     for (j=ys; j<ys+ny; j++) {
1827       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1828       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1829       slot   = i - gxs + gnx*(j - gys);
1830 
1831       /* Find block columns in block row */
1832       cnt = 0;
1833       for (ii=istart; ii<iend+1; ii++) {
1834         for (jj=jstart; jj<jend+1; jj++) {
1835           if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1836             cols[cnt++] = slot + ii + gnx*jj;
1837           }
1838         }
1839       }
1840       ierr = MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1841     }
1842   }
1843   ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1844   ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1845   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1846 
1847   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1848 
1849   /*
1850     For each node in the grid: we get the neighbors in the local (on processor ordering
1851     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1852     PETSc ordering.
1853   */
1854   if (!da->prealloc_only) {
1855     ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr);
1856     for (i=xs; i<xs+nx; i++) {
1857       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1858       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1859       for (j=ys; j<ys+ny; j++) {
1860         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1861         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1862         slot = i - gxs + gnx*(j - gys);
1863         cnt  = 0;
1864         for (ii=istart; ii<iend+1; ii++) {
1865           for (jj=jstart; jj<jend+1; jj++) {
1866             if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1867               cols[cnt++] = slot + ii + gnx*jj;
1868             }
1869           }
1870         }
1871         ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1872       }
1873     }
1874     ierr = PetscFree(values);CHKERRQ(ierr);
1875     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1876     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1877     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1878   }
1879   ierr = PetscFree(cols);CHKERRQ(ierr);
1880   PetscFunctionReturn(0);
1881 }
1882 
1883 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da,Mat J)
1884 {
1885   PetscErrorCode         ierr;
1886   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1887   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
1888   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1889   MPI_Comm               comm;
1890   PetscScalar            *values;
1891   DMBoundaryType         bx,by,bz;
1892   DMDAStencilType        st;
1893   ISLocalToGlobalMapping ltog;
1894 
1895   PetscFunctionBegin;
1896   /*
1897          nc - number of components per grid point
1898          col - number of colors needed in one direction for single component problem
1899 
1900   */
1901   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1902   col  = 2*s + 1;
1903 
1904   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1905   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1906   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1907 
1908   ierr = PetscMalloc1(col*col*col,&cols);CHKERRQ(ierr);
1909 
1910   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1911 
1912   /* determine the matrix preallocation information */
1913   ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1914   for (i=xs; i<xs+nx; i++) {
1915     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1916     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1917     for (j=ys; j<ys+ny; j++) {
1918       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1919       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1920       for (k=zs; k<zs+nz; k++) {
1921         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1922         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1923 
1924         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1925 
1926         /* Find block columns in block row */
1927         cnt = 0;
1928         for (ii=istart; ii<iend+1; ii++) {
1929           for (jj=jstart; jj<jend+1; jj++) {
1930             for (kk=kstart; kk<kend+1; kk++) {
1931               if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1932                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1933               }
1934             }
1935           }
1936         }
1937         ierr = MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1938       }
1939     }
1940   }
1941   ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1942   ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1943   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1944 
1945   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1946 
1947   /*
1948     For each node in the grid: we get the neighbors in the local (on processor ordering
1949     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1950     PETSc ordering.
1951   */
1952   if (!da->prealloc_only) {
1953     ierr = PetscCalloc1(col*col*col*nc*nc,&values);CHKERRQ(ierr);
1954     for (i=xs; i<xs+nx; i++) {
1955       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1956       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1957       for (j=ys; j<ys+ny; j++) {
1958         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1959         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1960         for (k=zs; k<zs+nz; k++) {
1961           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1962           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1963 
1964           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1965 
1966           cnt = 0;
1967           for (ii=istart; ii<iend+1; ii++) {
1968             for (jj=jstart; jj<jend+1; jj++) {
1969               for (kk=kstart; kk<kend+1; kk++) {
1970                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1971                   cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1972                 }
1973               }
1974             }
1975           }
1976           ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1977         }
1978       }
1979     }
1980     ierr = PetscFree(values);CHKERRQ(ierr);
1981     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1982     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1983     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1984   }
1985   ierr = PetscFree(cols);CHKERRQ(ierr);
1986   PetscFunctionReturn(0);
1987 }
1988 
1989 /*
1990   This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to
1991   identify in the local ordering with periodic domain.
1992 */
1993 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog,PetscInt *row,PetscInt *cnt,PetscInt col[])
1994 {
1995   PetscErrorCode ierr;
1996   PetscInt       i,n;
1997 
1998   PetscFunctionBegin;
1999   ierr = ISLocalToGlobalMappingApplyBlock(ltog,1,row,row);CHKERRQ(ierr);
2000   ierr = ISLocalToGlobalMappingApplyBlock(ltog,*cnt,col,col);CHKERRQ(ierr);
2001   for (i=0,n=0; i<*cnt; i++) {
2002     if (col[i] >= *row) col[n++] = col[i];
2003   }
2004   *cnt = n;
2005   PetscFunctionReturn(0);
2006 }
2007 
2008 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da,Mat J)
2009 {
2010   PetscErrorCode         ierr;
2011   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
2012   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
2013   PetscInt               istart,iend,jstart,jend,ii,jj;
2014   MPI_Comm               comm;
2015   PetscScalar            *values;
2016   DMBoundaryType         bx,by;
2017   DMDAStencilType        st;
2018   ISLocalToGlobalMapping ltog;
2019 
2020   PetscFunctionBegin;
2021   /*
2022      nc - number of components per grid point
2023      col - number of colors needed in one direction for single component problem
2024   */
2025   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
2026   col  = 2*s + 1;
2027 
2028   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
2029   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
2030   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
2031 
2032   ierr = PetscMalloc1(col*col*nc*nc,&cols);CHKERRQ(ierr);
2033 
2034   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
2035 
2036   /* determine the matrix preallocation information */
2037   ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
2038   for (i=xs; i<xs+nx; i++) {
2039     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2040     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2041     for (j=ys; j<ys+ny; j++) {
2042       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2043       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2044       slot   = i - gxs + gnx*(j - gys);
2045 
2046       /* Find block columns in block row */
2047       cnt = 0;
2048       for (ii=istart; ii<iend+1; ii++) {
2049         for (jj=jstart; jj<jend+1; jj++) {
2050           if (st == DMDA_STENCIL_BOX || !ii || !jj) {
2051             cols[cnt++] = slot + ii + gnx*jj;
2052           }
2053         }
2054       }
2055       ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
2056       ierr = MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
2057     }
2058   }
2059   ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
2060   ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
2061   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2062 
2063   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
2064 
2065   /*
2066     For each node in the grid: we get the neighbors in the local (on processor ordering
2067     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2068     PETSc ordering.
2069   */
2070   if (!da->prealloc_only) {
2071     ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr);
2072     for (i=xs; i<xs+nx; i++) {
2073       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2074       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2075       for (j=ys; j<ys+ny; j++) {
2076         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2077         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2078         slot   = i - gxs + gnx*(j - gys);
2079 
2080         /* Find block columns in block row */
2081         cnt = 0;
2082         for (ii=istart; ii<iend+1; ii++) {
2083           for (jj=jstart; jj<jend+1; jj++) {
2084             if (st == DMDA_STENCIL_BOX || !ii || !jj) {
2085               cols[cnt++] = slot + ii + gnx*jj;
2086             }
2087           }
2088         }
2089         ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
2090         ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
2091       }
2092     }
2093     ierr = PetscFree(values);CHKERRQ(ierr);
2094     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2095     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2096     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2097   }
2098   ierr = PetscFree(cols);CHKERRQ(ierr);
2099   PetscFunctionReturn(0);
2100 }
2101 
2102 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da,Mat J)
2103 {
2104   PetscErrorCode         ierr;
2105   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
2106   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
2107   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
2108   MPI_Comm               comm;
2109   PetscScalar            *values;
2110   DMBoundaryType         bx,by,bz;
2111   DMDAStencilType        st;
2112   ISLocalToGlobalMapping ltog;
2113 
2114   PetscFunctionBegin;
2115   /*
2116      nc - number of components per grid point
2117      col - number of colors needed in one direction for single component problem
2118   */
2119   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
2120   col  = 2*s + 1;
2121 
2122   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
2123   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
2124   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
2125 
2126   /* create the matrix */
2127   ierr = PetscMalloc1(col*col*col,&cols);CHKERRQ(ierr);
2128 
2129   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
2130 
2131   /* determine the matrix preallocation information */
2132   ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
2133   for (i=xs; i<xs+nx; i++) {
2134     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2135     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2136     for (j=ys; j<ys+ny; j++) {
2137       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2138       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2139       for (k=zs; k<zs+nz; k++) {
2140         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2141         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2142 
2143         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2144 
2145         /* Find block columns in block row */
2146         cnt = 0;
2147         for (ii=istart; ii<iend+1; ii++) {
2148           for (jj=jstart; jj<jend+1; jj++) {
2149             for (kk=kstart; kk<kend+1; kk++) {
2150               if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
2151                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
2152               }
2153             }
2154           }
2155         }
2156         ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
2157         ierr = MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
2158       }
2159     }
2160   }
2161   ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
2162   ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
2163   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2164 
2165   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
2166 
2167   /*
2168     For each node in the grid: we get the neighbors in the local (on processor ordering
2169     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2170     PETSc ordering.
2171   */
2172   if (!da->prealloc_only) {
2173     ierr = PetscCalloc1(col*col*col*nc*nc,&values);CHKERRQ(ierr);
2174     for (i=xs; i<xs+nx; i++) {
2175       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2176       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2177       for (j=ys; j<ys+ny; j++) {
2178         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2179         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2180         for (k=zs; k<zs+nz; k++) {
2181           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2182           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2183 
2184           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2185 
2186           cnt = 0;
2187           for (ii=istart; ii<iend+1; ii++) {
2188             for (jj=jstart; jj<jend+1; jj++) {
2189               for (kk=kstart; kk<kend+1; kk++) {
2190                 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
2191                   cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
2192                 }
2193               }
2194             }
2195           }
2196           ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
2197           ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
2198         }
2199       }
2200     }
2201     ierr = PetscFree(values);CHKERRQ(ierr);
2202     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2203     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2204     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2205   }
2206   ierr = PetscFree(cols);CHKERRQ(ierr);
2207   PetscFunctionReturn(0);
2208 }
2209 
2210 /* ---------------------------------------------------------------------------------*/
2211 
2212 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da,Mat J)
2213 {
2214   PetscErrorCode         ierr;
2215   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
2216   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt, maxcnt = 0,l,p,*dnz,*onz;
2217   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
2218   DM_DA                  *dd = (DM_DA*)da->data;
2219   PetscInt               ifill_col,*dfill = dd->dfill,*ofill = dd->ofill;
2220   MPI_Comm               comm;
2221   PetscScalar            *values;
2222   DMBoundaryType         bx,by,bz;
2223   ISLocalToGlobalMapping ltog;
2224   DMDAStencilType        st;
2225   PetscBool              removedups = PETSC_FALSE;
2226 
2227   PetscFunctionBegin;
2228   /*
2229          nc - number of components per grid point
2230          col - number of colors needed in one direction for single component problem
2231 
2232   */
2233   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
2234   col  = 2*s + 1;
2235   if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
2236                  by 2*stencil_width + 1\n");
2237   if (by == DM_BOUNDARY_PERIODIC && (n % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
2238                  by 2*stencil_width + 1\n");
2239   if (bz == DM_BOUNDARY_PERIODIC && (p % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
2240                  by 2*stencil_width + 1\n");
2241 
2242   /*
2243        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
2244        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
2245   */
2246   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
2247   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
2248   if (P == 1 && 2*s >= p) removedups = PETSC_TRUE;
2249 
2250   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
2251   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
2252   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
2253 
2254   ierr = PetscMalloc1(col*col*col*nc,&cols);CHKERRQ(ierr);
2255   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
2256 
2257   /* determine the matrix preallocation information */
2258   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
2259 
2260   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
2261   for (i=xs; i<xs+nx; i++) {
2262     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2263     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2264     for (j=ys; j<ys+ny; j++) {
2265       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2266       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2267       for (k=zs; k<zs+nz; k++) {
2268         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2269         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2270 
2271         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2272 
2273         for (l=0; l<nc; l++) {
2274           cnt = 0;
2275           for (ii=istart; ii<iend+1; ii++) {
2276             for (jj=jstart; jj<jend+1; jj++) {
2277               for (kk=kstart; kk<kend+1; kk++) {
2278                 if (ii || jj || kk) {
2279                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
2280                     for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2281                   }
2282                 } else {
2283                   if (dfill) {
2284                     for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2285                   } else {
2286                     for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2287                   }
2288                 }
2289               }
2290             }
2291           }
2292           row  = l + nc*(slot);
2293           maxcnt = PetscMax(maxcnt,cnt);
2294           if (removedups) {
2295             ierr = MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
2296           } else {
2297             ierr = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
2298           }
2299         }
2300       }
2301     }
2302   }
2303   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
2304   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
2305   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2306   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
2307 
2308   /*
2309     For each node in the grid: we get the neighbors in the local (on processor ordering
2310     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2311     PETSc ordering.
2312   */
2313   if (!da->prealloc_only) {
2314     ierr = PetscCalloc1(maxcnt,&values);CHKERRQ(ierr);
2315     for (i=xs; i<xs+nx; i++) {
2316       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2317       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2318       for (j=ys; j<ys+ny; j++) {
2319         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2320         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2321         for (k=zs; k<zs+nz; k++) {
2322           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2323           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2324 
2325           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2326 
2327           for (l=0; l<nc; l++) {
2328             cnt = 0;
2329             for (ii=istart; ii<iend+1; ii++) {
2330               for (jj=jstart; jj<jend+1; jj++) {
2331                 for (kk=kstart; kk<kend+1; kk++) {
2332                   if (ii || jj || kk) {
2333                     if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
2334                       for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2335                     }
2336                   } else {
2337                     if (dfill) {
2338                       for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2339                     } else {
2340                       for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2341                     }
2342                   }
2343                 }
2344               }
2345             }
2346             row  = l + nc*(slot);
2347             ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
2348           }
2349         }
2350       }
2351     }
2352     ierr = PetscFree(values);CHKERRQ(ierr);
2353     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2354     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2355     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2356   }
2357   ierr = PetscFree(cols);CHKERRQ(ierr);
2358   PetscFunctionReturn(0);
2359 }
2360